-
Notifications
You must be signed in to change notification settings - Fork 309
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
perf: DB-API uses more efficient query_and_wait
when no job ID is provided
#1747
Changes from all commits
b8c583a
6a8059d
1f0e38e
4401725
d078941
660aa76
476bcd7
05d6a3e
c16e4be
222f91b
73e5817
9508121
543481d
85f1cab
c0e6c86
d4a322d
e0b2d2e
9daccbd
bba36d2
adf0b49
6dfbf92
765a644
e461ebe
895b6d0
d5345cd
f75d8ab
baff9d6
221898d
18f825a
5afbc41
08167d8
3e10ea4
dc5e5be
f1556bc
db71a1b
bd7e767
4ffec17
95b3b0e
ed317ec
f08dac3
5b324ee
3b20ed7
a376bd6
4019bbf
7c3d813
304799a
425f6b0
ae06cd2
2baa5d3
e819421
4771602
b435e41
64bf4a4
225049e
5154866
3b60b5b
83679ab
0bd35e5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1566,6 +1566,7 @@ def __init__( | |
job_id: Optional[str] = None, | ||
query_id: Optional[str] = None, | ||
project: Optional[str] = None, | ||
num_dml_affected_rows: Optional[int] = None, | ||
): | ||
super(RowIterator, self).__init__( | ||
client, | ||
|
@@ -1592,6 +1593,7 @@ def __init__( | |
self._job_id = job_id | ||
self._query_id = query_id | ||
self._project = project | ||
self._num_dml_affected_rows = num_dml_affected_rows | ||
|
||
@property | ||
def _billing_project(self) -> Optional[str]: | ||
|
@@ -1616,6 +1618,16 @@ def location(self) -> Optional[str]: | |
""" | ||
return self._location | ||
|
||
@property | ||
def num_dml_affected_rows(self) -> Optional[int]: | ||
"""If this RowIterator is the result of a DML query, the number of | ||
rows that were affected. | ||
|
||
See: | ||
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.num_dml_affected_rows | ||
""" | ||
return self._num_dml_affected_rows | ||
|
||
@property | ||
def project(self) -> Optional[str]: | ||
"""GCP Project ID where these rows are read from.""" | ||
|
@@ -1635,7 +1647,10 @@ def _is_almost_completely_cached(self): | |
This is useful to know, because we can avoid alternative download | ||
mechanisms. | ||
""" | ||
if self._first_page_response is None: | ||
if ( | ||
not hasattr(self, "_first_page_response") | ||
or self._first_page_response is None | ||
): | ||
return False | ||
|
||
total_cached_rows = len(self._first_page_response.get(self._items_key, [])) | ||
|
@@ -1655,7 +1670,7 @@ def _is_almost_completely_cached(self): | |
|
||
return False | ||
|
||
def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): | ||
def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client): | ||
"""Returns True if the BigQuery Storage API can be used. | ||
|
||
Returns: | ||
|
@@ -1669,8 +1684,9 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): | |
if self._table is None: | ||
return False | ||
|
||
# The developer is manually paging through results if this is set. | ||
if self.next_page_token is not None: | ||
# The developer has already started paging through results if | ||
# next_page_token is set. | ||
if hasattr(self, "next_page_token") and self.next_page_token is not None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just for my education, it looks like attribute There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was purely for some failing unit tests where this superclass was mocked out. |
||
return False | ||
|
||
if self._is_almost_completely_cached(): | ||
|
@@ -1726,7 +1742,7 @@ def schema(self): | |
|
||
@property | ||
def total_rows(self): | ||
"""int: The total number of rows in the table.""" | ||
"""int: The total number of rows in the table or query results.""" | ||
return self._total_rows | ||
|
||
def _maybe_warn_max_results( | ||
|
@@ -1752,7 +1768,7 @@ def _maybe_warn_max_results( | |
def _to_page_iterable( | ||
self, bqstorage_download, tabledata_list_download, bqstorage_client=None | ||
): | ||
if not self._validate_bqstorage(bqstorage_client, False): | ||
if not self._should_use_bqstorage(bqstorage_client, False): | ||
bqstorage_client = None | ||
|
||
result_pages = ( | ||
|
@@ -1882,7 +1898,7 @@ def to_arrow( | |
|
||
self._maybe_warn_max_results(bqstorage_client) | ||
|
||
if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): | ||
if not self._should_use_bqstorage(bqstorage_client, create_bqstorage_client): | ||
create_bqstorage_client = False | ||
bqstorage_client = None | ||
|
||
|
@@ -2223,7 +2239,7 @@ def to_dataframe( | |
|
||
self._maybe_warn_max_results(bqstorage_client) | ||
|
||
if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): | ||
if not self._should_use_bqstorage(bqstorage_client, create_bqstorage_client): | ||
create_bqstorage_client = False | ||
bqstorage_client = None | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because we set at line 1591
self._first_page_response = first_page_response
, this attribute will always exist? Maybe we can check whether the value is None or not.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This also was needed for some tests where we have a mock row iterator but want to test with a real implementation of this method.