Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: retry 'job exceeded rate limits' for DDL queries #1794

Merged
merged 12 commits into from
Jan 24, 2024
2 changes: 1 addition & 1 deletion google/cloud/bigquery/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def _should_retry(exc):
deadline on the retry object.
"""

job_retry_reasons = "rateLimitExceeded", "backendError"
job_retry_reasons = "rateLimitExceeded", "backendError", "jobRateLimitExceeded"


def _job_should_retry(exc):
Expand Down
69 changes: 69 additions & 0 deletions tests/unit/test_job_retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
import google.api_core.retry
import freezegun

from google.cloud.bigquery.client import Client
from google.cloud.bigquery import _job_helpers

from .helpers import make_connection


Expand Down Expand Up @@ -240,3 +243,69 @@ def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client):
),
):
job.result(job_retry=google.api_core.retry.Retry())


def test_query_and_wait_retries_job():
"""
Specific test for retrying DDL queries with "jobRateLimitExceeded" error:
https://github.com/googleapis/python-bigquery/issues/1790
"""
freezegun.freeze_time(auto_tick_seconds=100)
client = mock.create_autospec(Client)
client._call_api.__name__ = "_call_api"
client._call_api.__qualname__ = "Client._call_api"
client._call_api.__annotations__ = {}
client._call_api.__type_params__ = ()
client._call_api.side_effect = (
google.api_core.exceptions.BadRequest("jobRateLimitExceeded"),
google.api_core.exceptions.InternalServerError("jobRateLimitExceeded"),
google.api_core.exceptions.BadRequest("jobRateLimitExceeded"),
{
"jobReference": {
"projectId": "response-project",
"jobId": "abc",
"location": "response-location",
},
"jobComplete": True,
"schema": {
"fields": [
{"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
{"name": "age", "type": "INT64", "mode": "NULLABLE"},
],
},
"rows": [
{"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]},
{"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]},
{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
{"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
],
},
)
rows = _job_helpers.query_and_wait(
client,
query="SELECT 1",
location="request-location",
project="request-project",
job_config=None,
page_size=None,
max_results=None,
retry=google.api_core.retry.Retry(
lambda exc: isinstance(exc, google.api_core.exceptions.BadRequest),
multiplier=1.0,
).with_deadline(
200.0
), # Since auto_tick_seconds is 100, we should get at least 1 retry.
job_retry=google.api_core.retry.Retry(
lambda exc: isinstance(exc, google.api_core.exceptions.InternalServerError),
multiplier=1.0,
).with_deadline(600.0),
)
assert len(list(rows)) == 4

# For this code path, where the query has finished immediately, we should
# only be calling the jobs.query API and no other request path.
request_path = "/projects/request-project/queries"
for call in client._call_api.call_args_list:
_, kwargs = call
assert kwargs["method"] == "POST"
assert kwargs["path"] == request_path
27 changes: 27 additions & 0 deletions tests/unit/test_retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,30 @@ def test_DEFAULT_JOB_RETRY_deadline():

# Make sure we can retry the job at least once.
assert DEFAULT_JOB_RETRY._deadline > DEFAULT_RETRY._deadline


def test_DEFAULT_JOB_RETRY_job_rate_limit_exceeded_retry_predicate():
"""Tests the retry predicate specifically for jobRateLimitExceeded."""
from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY
from google.api_core.exceptions import ClientError

# Non-ClientError exceptions should never trigger a retry
assert not DEFAULT_JOB_RETRY._predicate(TypeError())

# ClientError without specific reason shouldn't trigger a retry
assert not DEFAULT_JOB_RETRY._predicate(ClientError("fail"))

# ClientError with generic reason "idk" shouldn't trigger a retry
assert not DEFAULT_JOB_RETRY._predicate(
ClientError("fail", errors=[dict(reason="idk")])
)

# ClientError with reason "jobRateLimitExceeded" should trigger a retry
assert DEFAULT_JOB_RETRY._predicate(
ClientError("fail", errors=[dict(reason="jobRateLimitExceeded")])
)

# Other retryable reasons should still work as expected
assert DEFAULT_JOB_RETRY._predicate(
ClientError("fail", errors=[dict(reason="backendError")])
)