From c659d73b984a5108d0733874c164c55b71160ad9 Mon Sep 17 00:00:00 2001 From: blainehansen Date: Fri, 21 Sep 2018 16:51:39 -0600 Subject: [PATCH 1/9] master --- bigquery/google/cloud/bigquery/client.py | 16 ++++++++++++++-- bigquery/google/cloud/bigquery/job.py | 20 ++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 89a26ab18743..288b727c4c94 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -122,11 +122,12 @@ class Client(ClientWithProject): """The scopes required for authenticating as a BigQuery consumer.""" def __init__( - self, project=None, credentials=None, _http=None, location=None): + self, project=None, credentials=None, _http=None, location=None, query_job_config=None): super(Client, self).__init__( project=project, credentials=credentials, _http=_http) self._connection = Connection(self) self._location = location + self._default_query_job_config = query_job_config @property def location(self): @@ -1187,7 +1188,7 @@ def extract_table( return extract_job def query( - self, query, job_config=None, job_id=None, job_id_prefix=None, + self, query, job_config=None, override_job_config=False, job_id=None, job_id_prefix=None, location=None, project=None, retry=DEFAULT_RETRY): """Run a SQL query. @@ -1226,6 +1227,17 @@ def query( if location is None: location = self.location + # BLAINE + # if they don't want to override, we need to merge what they passed + if not override_job_config and self._default_query_job_config: + if job_config: + # anything that's not defined on the incoming, should be filled in with the default + job_config = job_config.merge_job_config(self._default_query_job_config) + else: + job_config = self._default_query_job_config + # if they want to override (or if they never passed a default), + # we simply send whatever they sent + job_ref = job._JobReference(job_id, project=project, location=location) query_job = job.QueryJob( job_ref, query, client=self, job_config=job_config) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 7a4478551336..01a2850f86ae 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -819,6 +819,26 @@ def to_api_repr(self): """ return copy.deepcopy(self._properties) + # BLAINE + def merge_job_config(self, other_job_config): + """Merge this job config with another one. The other takes precedence with conflicting keys. + This is a naive one level merge. + + :rtype: :class:`google.cloud.bigquery.job._JobConfig` + :returns: A new job config. + """ + if self._job_type == other_job_config._job_type: + raise TypeError("attempted to merge two incompatible job types: " + repr(self._job_type) + ', ' + repr(other_job_config._job_type)) + + new_job_config = self.__class__() + new_job_config._properties = copy.deepcopy(self._properties) + + for key in list(other_job_config._properties.keys()): + if not self._properties.get(key): + new_job_config._properties[key] = other_job_config._properties[key] + + return new_job_config + @classmethod def from_api_repr(cls, resource): """Factory: construct a job configuration given its API representation From 79fe38e3d3d6448af5b3694554621bda8159ae2a Mon Sep 17 00:00:00 2001 From: blainehansen Date: Mon, 24 Sep 2018 15:29:39 -0600 Subject: [PATCH 2/9] working implementation of default QueryJobConfigs attached to Client --- bigquery/google/cloud/bigquery/client.py | 6 +- bigquery/google/cloud/bigquery/job.py | 15 +-- bigquery/tests/unit/test_client.py | 120 +++++++++++++++++++++++ bigquery/tests/unit/test_job.py | 24 +++++ 4 files changed, 157 insertions(+), 8 deletions(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 288b727c4c94..3e43572fba1f 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -1231,8 +1231,10 @@ def query( # if they don't want to override, we need to merge what they passed if not override_job_config and self._default_query_job_config: if job_config: - # anything that's not defined on the incoming, should be filled in with the default - job_config = job_config.merge_job_config(self._default_query_job_config) + # anything that's not defined on the incoming that is in the default, + # should be filled in with the default + # the incoming therefore has precedence + job_config = job_config.fill_from_default(self._default_query_job_config) else: job_config = self._default_query_job_config # if they want to override (or if they never passed a default), diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 01a2850f86ae..317f8800eec5 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -820,22 +820,25 @@ def to_api_repr(self): return copy.deepcopy(self._properties) # BLAINE - def merge_job_config(self, other_job_config): + def fill_from_default(self, default_job_config): """Merge this job config with another one. The other takes precedence with conflicting keys. This is a naive one level merge. :rtype: :class:`google.cloud.bigquery.job._JobConfig` :returns: A new job config. """ - if self._job_type == other_job_config._job_type: - raise TypeError("attempted to merge two incompatible job types: " + repr(self._job_type) + ', ' + repr(other_job_config._job_type)) + if self._job_type != default_job_config._job_type: + raise TypeError("attempted to merge two incompatible job types: " + repr(self._job_type) + ', ' + repr(default_job_config._job_type)) new_job_config = self.__class__() new_job_config._properties = copy.deepcopy(self._properties) - for key in list(other_job_config._properties.keys()): - if not self._properties.get(key): - new_job_config._properties[key] = other_job_config._properties[key] + self_job_properties = self._properties[self._job_type] + new_job_properties = new_job_config._properties[self._job_type] + default_job_properties = default_job_config._properties[self._job_type] + for key in list(default_job_properties.keys()): + if not self_job_properties.get(key): + new_job_properties[key] = default_job_properties[key] return new_job_config diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index ee7bb7ca8632..f5e6768c4a41 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -96,6 +96,28 @@ def test_ctor_w_location(self): self.assertIs(client._connection.http, http) self.assertEqual(client.location, location) + # BLAINE + def test_ctor_w_query_job_config(self): + from google.cloud.bigquery._http import Connection + from google.cloud.bigquery import QueryJobConfig + + creds = _make_credentials() + http = object() + location = 'us-central' + job_config = QueryJobConfig() + job_config.dry_run = True + + client = self._make_one(project=self.PROJECT, credentials=creds, + _http=http, location=location, + query_job_config=job_config) + self.assertIsInstance(client._connection, Connection) + self.assertIs(client._connection.credentials, creds) + self.assertIs(client._connection.http, http) + self.assertEqual(client.location, location) + + self.assertIsInstance(client._default_query_job_config, QueryJobConfig) + self.assertTrue(client._default_query_job_config.dry_run) + def test__get_query_results_miss_w_explicit_project_and_timeout(self): from google.cloud.exceptions import NotFound @@ -2707,6 +2729,104 @@ def test_query_w_explicit_project(self): data=resource, ) + def test_query_w_explicit_job_config(self): + job_id = 'some-job-id' + query = 'select count(*) from persons' + resource = { + 'jobReference': { + 'jobId': job_id, + 'projectId': self.PROJECT, + 'location': self.LOCATION, + }, + 'configuration': { + 'query': { + 'query': query, + 'defaultDataset': { + 'projectId': self.PROJECT, + 'datasetId': 'some-dataset', + }, + 'useLegacySql': False, + 'useQueryCache': True, + 'maximumBytesBilled': '2000', + }, + }, + } + + creds = _make_credentials() + http = object() + # BLAINE + + from google.cloud.bigquery import QueryJobConfig, DatasetReference + default_job_config = QueryJobConfig() + default_job_config.default_dataset = DatasetReference(self.PROJECT, 'some-dataset') + default_job_config.maximum_bytes_billed = 1000 + + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http, query_job_config=default_job_config) + conn = client._connection = _make_connection(resource) + + job_config = QueryJobConfig() + job_config.use_query_cache = True + job_config.maximum_bytes_billed = 2000 + + # override_job_config + client.query( + query, job_id=job_id, location=self.LOCATION, job_config=job_config) + + # Check that query actually starts the job. + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/PROJECT/jobs', + data=resource, + ) + + def test_query_w_explicit_job_config_override(self): + job_id = 'some-job-id' + query = 'select count(*) from persons' + resource = { + 'jobReference': { + 'jobId': job_id, + 'projectId': self.PROJECT, + 'location': self.LOCATION, + }, + 'configuration': { + 'query': { + 'query': query, + 'useLegacySql': False, + 'useQueryCache': True, + 'maximumBytesBilled': '2000', + }, + }, + } + + creds = _make_credentials() + http = object() + # BLAINE + + from google.cloud.bigquery import QueryJobConfig, DatasetReference + default_job_config = QueryJobConfig() + default_job_config.default_dataset = DatasetReference(self.PROJECT, 'some-dataset') + default_job_config.maximum_bytes_billed = 1000 + + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http, query_job_config=default_job_config) + conn = client._connection = _make_connection(resource) + + job_config = QueryJobConfig() + job_config.use_query_cache = True + job_config.maximum_bytes_billed = 2000 + + # override_job_config + client.query( + query, job_id=job_id, location=self.LOCATION, job_config=job_config, override_job_config=True) + + # Check that query actually starts the job. + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/PROJECT/jobs', + data=resource, + ) + def test_query_w_client_location(self): job_id = 'some-job-id' query = 'select count(*) from persons' diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index f3a57439c508..52f075f67465 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -912,6 +912,30 @@ def test_ctor(self): self.assertEqual(job_config._job_type, self.JOB_TYPE) self.assertEqual(job_config._properties, {self.JOB_TYPE: {}}) + def test_fill_from_default(self): + from google.cloud.bigquery import QueryJobConfig + + job_config = QueryJobConfig() + job_config.dry_run = True + job_config.maximum_bytes_billed = 1000 + + default_job_config = QueryJobConfig() + default_job_config.use_query_cache = True + default_job_config.maximum_bytes_billed = 2000 + + final_job_config = job_config.fill_from_default(default_job_config) + self.assertTrue(final_job_config.dry_run) + self.assertTrue(final_job_config.use_query_cache) + self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + + # case where job types differ + basic_job_config = QueryJobConfig() + conflicting_job_config = self._make_one('conflicting_job_type') + self.assertNotEqual(basic_job_config._job_type, conflicting_job_config._job_type) + + with self.assertRaises(TypeError): + fail_job_config = basic_job_config.fill_from_default(conflicting_job_config) + @mock.patch('google.cloud.bigquery._helpers._get_sub_prop') def test__get_sub_prop_wo_default(self, _get_sub_prop): job_config = self._make_one() From 7edcbba908770078414769d380b647d7ff9fc5e4 Mon Sep 17 00:00:00 2001 From: blainehansen Date: Mon, 24 Sep 2018 15:32:08 -0600 Subject: [PATCH 3/9] removing comments and help texts --- bigquery/google/cloud/bigquery/client.py | 1 - bigquery/google/cloud/bigquery/job.py | 1 - bigquery/tests/unit/test_client.py | 3 --- 3 files changed, 5 deletions(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 3e43572fba1f..3858aa54e576 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -1227,7 +1227,6 @@ def query( if location is None: location = self.location - # BLAINE # if they don't want to override, we need to merge what they passed if not override_job_config and self._default_query_job_config: if job_config: diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 317f8800eec5..4bfb22fbc40b 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -819,7 +819,6 @@ def to_api_repr(self): """ return copy.deepcopy(self._properties) - # BLAINE def fill_from_default(self, default_job_config): """Merge this job config with another one. The other takes precedence with conflicting keys. This is a naive one level merge. diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index f5e6768c4a41..605ef4e18c5b 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -96,7 +96,6 @@ def test_ctor_w_location(self): self.assertIs(client._connection.http, http) self.assertEqual(client.location, location) - # BLAINE def test_ctor_w_query_job_config(self): from google.cloud.bigquery._http import Connection from google.cloud.bigquery import QueryJobConfig @@ -2754,7 +2753,6 @@ def test_query_w_explicit_job_config(self): creds = _make_credentials() http = object() - # BLAINE from google.cloud.bigquery import QueryJobConfig, DatasetReference default_job_config = QueryJobConfig() @@ -2801,7 +2799,6 @@ def test_query_w_explicit_job_config_override(self): creds = _make_credentials() http = object() - # BLAINE from google.cloud.bigquery import QueryJobConfig, DatasetReference default_job_config = QueryJobConfig() From 856473961f6d7406509b196a8869451a82e7bd2e Mon Sep 17 00:00:00 2001 From: blainehansen Date: Mon, 24 Sep 2018 16:15:22 -0600 Subject: [PATCH 4/9] fixing lints --- bigquery/google/cloud/bigquery/client.py | 16 +++++++++++----- bigquery/google/cloud/bigquery/job.py | 10 +++++++--- bigquery/tests/unit/test_client.py | 20 +++++++++++++------- bigquery/tests/unit/test_job.py | 6 ++++-- 4 files changed, 35 insertions(+), 17 deletions(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 3858aa54e576..7c95e58ca029 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -122,7 +122,8 @@ class Client(ClientWithProject): """The scopes required for authenticating as a BigQuery consumer.""" def __init__( - self, project=None, credentials=None, _http=None, location=None, query_job_config=None): + self, project=None, credentials=None, _http=None, + location=None, query_job_config=None): super(Client, self).__init__( project=project, credentials=credentials, _http=_http) self._connection = Connection(self) @@ -1188,7 +1189,9 @@ def extract_table( return extract_job def query( - self, query, job_config=None, override_job_config=False, job_id=None, job_id_prefix=None, + self, query, + job_config=None, override_job_config=False, + job_id=None, job_id_prefix=None, location=None, project=None, retry=DEFAULT_RETRY): """Run a SQL query. @@ -1227,13 +1230,16 @@ def query( if location is None: location = self.location - # if they don't want to override, we need to merge what they passed + # if they don't want to override, + # we need to merge what they passed if not override_job_config and self._default_query_job_config: if job_config: - # anything that's not defined on the incoming that is in the default, + # anything that's not defined on the incoming + # that is in the default, # should be filled in with the default # the incoming therefore has precedence - job_config = job_config.fill_from_default(self._default_query_job_config) + job_config = job_config.fill_from_default( + self._default_query_job_config) else: job_config = self._default_query_job_config # if they want to override (or if they never passed a default), diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 4bfb22fbc40b..fa8f876dc1c3 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -820,14 +820,18 @@ def to_api_repr(self): return copy.deepcopy(self._properties) def fill_from_default(self, default_job_config): - """Merge this job config with another one. The other takes precedence with conflicting keys. + """Merge this job config with another one. + The other takes precedence with conflicting keys. This is a naive one level merge. - + :rtype: :class:`google.cloud.bigquery.job._JobConfig` :returns: A new job config. """ if self._job_type != default_job_config._job_type: - raise TypeError("attempted to merge two incompatible job types: " + repr(self._job_type) + ', ' + repr(default_job_config._job_type)) + raise TypeError( + "attempted to merge two incompatible job types: " + + repr(self._job_type) + ', ' + + repr(default_job_config._job_type)) new_job_config = self.__class__() new_job_config._properties = copy.deepcopy(self._properties) diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index 605ef4e18c5b..53bb81bbc392 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -113,7 +113,7 @@ def test_ctor_w_query_job_config(self): self.assertIs(client._connection.credentials, creds) self.assertIs(client._connection.http, http) self.assertEqual(client.location, location) - + self.assertIsInstance(client._default_query_job_config, QueryJobConfig) self.assertTrue(client._default_query_job_config.dry_run) @@ -2756,11 +2756,13 @@ def test_query_w_explicit_job_config(self): from google.cloud.bigquery import QueryJobConfig, DatasetReference default_job_config = QueryJobConfig() - default_job_config.default_dataset = DatasetReference(self.PROJECT, 'some-dataset') + default_job_config.default_dataset = DatasetReference( + self.PROJECT, 'some-dataset') default_job_config.maximum_bytes_billed = 1000 client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http, query_job_config=default_job_config) + project=self.PROJECT, credentials=creds, + _http=http, query_job_config=default_job_config) conn = client._connection = _make_connection(resource) job_config = QueryJobConfig() @@ -2769,7 +2771,8 @@ def test_query_w_explicit_job_config(self): # override_job_config client.query( - query, job_id=job_id, location=self.LOCATION, job_config=job_config) + query, job_id=job_id, location=self.LOCATION, + job_config=job_config) # Check that query actually starts the job. conn.api_request.assert_called_once_with( @@ -2802,11 +2805,13 @@ def test_query_w_explicit_job_config_override(self): from google.cloud.bigquery import QueryJobConfig, DatasetReference default_job_config = QueryJobConfig() - default_job_config.default_dataset = DatasetReference(self.PROJECT, 'some-dataset') + default_job_config.default_dataset = DatasetReference( + self.PROJECT, 'some-dataset') default_job_config.maximum_bytes_billed = 1000 client = self._make_one( - project=self.PROJECT, credentials=creds, _http=http, query_job_config=default_job_config) + project=self.PROJECT, credentials=creds, _http=http, + query_job_config=default_job_config) conn = client._connection = _make_connection(resource) job_config = QueryJobConfig() @@ -2815,7 +2820,8 @@ def test_query_w_explicit_job_config_override(self): # override_job_config client.query( - query, job_id=job_id, location=self.LOCATION, job_config=job_config, override_job_config=True) + query, job_id=job_id, location=self.LOCATION, + job_config=job_config, override_job_config=True) # Check that query actually starts the job. conn.api_request.assert_called_once_with( diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 52f075f67465..c51bd848f066 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -931,10 +931,12 @@ def test_fill_from_default(self): # case where job types differ basic_job_config = QueryJobConfig() conflicting_job_config = self._make_one('conflicting_job_type') - self.assertNotEqual(basic_job_config._job_type, conflicting_job_config._job_type) + self.assertNotEqual( + basic_job_config._job_type, conflicting_job_config._job_type) with self.assertRaises(TypeError): - fail_job_config = basic_job_config.fill_from_default(conflicting_job_config) + basic_job_config.fill_from_default( + conflicting_job_config) @mock.patch('google.cloud.bigquery._helpers._get_sub_prop') def test__get_sub_prop_wo_default(self, _get_sub_prop): From b000dbbdbcef438766d183deeb7f46291b59c3c2 Mon Sep 17 00:00:00 2001 From: blainehansen Date: Tue, 25 Sep 2018 14:03:29 -0600 Subject: [PATCH 5/9] bringing coverage up to 100% --- bigquery/tests/unit/test_client.py | 40 ++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index 53bb81bbc392..2e7e467f5afb 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -2830,6 +2830,46 @@ def test_query_w_explicit_job_config_override(self): data=resource, ) + def test_query_w_client_default_config_no_incoming(self): + job_id = 'some-job-id' + query = 'select count(*) from persons' + resource = { + 'jobReference': { + 'jobId': job_id, + 'projectId': self.PROJECT, + 'location': self.LOCATION, + }, + 'configuration': { + 'query': { + 'query': query, + 'useLegacySql': False, + 'maximumBytesBilled': '1000', + }, + }, + } + + creds = _make_credentials() + http = object() + + from google.cloud.bigquery import QueryJobConfig + default_job_config = QueryJobConfig() + default_job_config.maximum_bytes_billed = 1000 + + client = self._make_one( + project=self.PROJECT, credentials=creds, _http=http, + query_job_config=default_job_config) + conn = client._connection = _make_connection(resource) + + client.query( + query, job_id=job_id, location=self.LOCATION) + + # Check that query actually starts the job. + conn.api_request.assert_called_once_with( + method='POST', + path='/projects/PROJECT/jobs', + data=resource, + ) + def test_query_w_client_location(self): job_id = 'some-job-id' query = 'select count(*) from persons' From e2dfaf27ec4b8245a6042d8b79583f5a4183be98 Mon Sep 17 00:00:00 2001 From: blainehansen Date: Tue, 25 Sep 2018 15:52:39 -0600 Subject: [PATCH 6/9] making revisions --- bigquery/google/cloud/bigquery/client.py | 19 ++++++++++++++----- bigquery/google/cloud/bigquery/job.py | 9 ++++++--- bigquery/tests/unit/test_job.py | 2 +- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 7c95e58ca029..bdda002b79af 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -108,8 +108,11 @@ class Client(ClientWithProject): current object. This parameter should be considered private, and could change in the future. - location str: + location (str): (Optional) Default location for jobs / datasets / tables. + default_query_job_config (google.cloud.bigquery.job.QueryJobConfig): + (Optional) Default ``QueryJobConfig``. + Will be merged into job configs passed into the ``query`` method. Raises: google.auth.exceptions.DefaultCredentialsError: @@ -123,12 +126,12 @@ class Client(ClientWithProject): def __init__( self, project=None, credentials=None, _http=None, - location=None, query_job_config=None): + location=None, default_query_job_config=None): super(Client, self).__init__( project=project, credentials=credentials, _http=_http) self._connection = Connection(self) self._location = location - self._default_query_job_config = query_job_config + self._default_query_job_config = default_query_job_config @property def location(self): @@ -1190,7 +1193,7 @@ def extract_table( def query( self, query, - job_config=None, override_job_config=False, + job_config=None, merge_job_config=True, job_id=None, job_id_prefix=None, location=None, project=None, retry=DEFAULT_RETRY): """Run a SQL query. @@ -1206,6 +1209,12 @@ def query( Keyword Arguments: job_config (google.cloud.bigquery.job.QueryJobConfig): (Optional) Extra configuration options for the job. + merge_job_config (bool): + (Optional) Merge the passed ``job_config`` with the + ``default_query_job_config`` passed to the ``Client`` constructor. + The default value is ``True``, so to instead use only the + passed ``job_config`` without any merging, + pass ``False`` for this argument. job_id (str): (Optional) ID to use for the query job. job_id_prefix (str): (Optional) The prefix to use for a randomly generated job ID. @@ -1232,7 +1241,7 @@ def query( # if they don't want to override, # we need to merge what they passed - if not override_job_config and self._default_query_job_config: + if merge_job_config and self._default_query_job_config: if job_config: # anything that's not defined on the incoming # that is in the default, diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index fa8f876dc1c3..c6013a02d5b2 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -824,6 +824,10 @@ def fill_from_default(self, default_job_config): The other takes precedence with conflicting keys. This is a naive one level merge. + :type default_job_config: google.cloud.bigquery.job._JobConfig + :param default_job_config: + The default job config that will be used to fill in self. + :rtype: :class:`google.cloud.bigquery.job._JobConfig` :returns: A new job config. """ @@ -836,11 +840,10 @@ def fill_from_default(self, default_job_config): new_job_config = self.__class__() new_job_config._properties = copy.deepcopy(self._properties) - self_job_properties = self._properties[self._job_type] new_job_properties = new_job_config._properties[self._job_type] default_job_properties = default_job_config._properties[self._job_type] - for key in list(default_job_properties.keys()): - if not self_job_properties.get(key): + for key in default_job_properties: + if key not in new_job_properties: new_job_properties[key] = default_job_properties[key] return new_job_config diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index c51bd848f066..07a7f1b4add8 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -928,7 +928,7 @@ def test_fill_from_default(self): self.assertTrue(final_job_config.use_query_cache) self.assertEqual(final_job_config.maximum_bytes_billed, 1000) - # case where job types differ + def test_fill_from_default_conflict(self): basic_job_config = QueryJobConfig() conflicting_job_config = self._make_one('conflicting_job_type') self.assertNotEqual( From c0a82ab2cca3163883668961f9368ed77366045d Mon Sep 17 00:00:00 2001 From: blainehansen Date: Tue, 25 Sep 2018 16:07:07 -0600 Subject: [PATCH 7/9] missed some changes --- bigquery/google/cloud/bigquery/client.py | 5 +++-- bigquery/tests/unit/test_client.py | 13 ++++++------- bigquery/tests/unit/test_job.py | 2 ++ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index bdda002b79af..4f9aea2158c6 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -1210,8 +1210,9 @@ def query( job_config (google.cloud.bigquery.job.QueryJobConfig): (Optional) Extra configuration options for the job. merge_job_config (bool): - (Optional) Merge the passed ``job_config`` with the - ``default_query_job_config`` passed to the ``Client`` constructor. + (Optional) Merge the passed ``job_config`` + with the ``default_query_job_config`` + passed to the ``Client`` constructor. The default value is ``True``, so to instead use only the passed ``job_config`` without any merging, pass ``False`` for this argument. diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index 2e7e467f5afb..3347cb538a7f 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -108,7 +108,7 @@ def test_ctor_w_query_job_config(self): client = self._make_one(project=self.PROJECT, credentials=creds, _http=http, location=location, - query_job_config=job_config) + default_query_job_config=job_config) self.assertIsInstance(client._connection, Connection) self.assertIs(client._connection.credentials, creds) self.assertIs(client._connection.http, http) @@ -2762,14 +2762,13 @@ def test_query_w_explicit_job_config(self): client = self._make_one( project=self.PROJECT, credentials=creds, - _http=http, query_job_config=default_job_config) + _http=http, default_query_job_config=default_job_config) conn = client._connection = _make_connection(resource) job_config = QueryJobConfig() job_config.use_query_cache = True job_config.maximum_bytes_billed = 2000 - # override_job_config client.query( query, job_id=job_id, location=self.LOCATION, job_config=job_config) @@ -2811,17 +2810,17 @@ def test_query_w_explicit_job_config_override(self): client = self._make_one( project=self.PROJECT, credentials=creds, _http=http, - query_job_config=default_job_config) + default_query_job_config=default_job_config) conn = client._connection = _make_connection(resource) job_config = QueryJobConfig() job_config.use_query_cache = True job_config.maximum_bytes_billed = 2000 - # override_job_config + # merge_job_config client.query( query, job_id=job_id, location=self.LOCATION, - job_config=job_config, override_job_config=True) + job_config=job_config, merge_job_config=False) # Check that query actually starts the job. conn.api_request.assert_called_once_with( @@ -2857,7 +2856,7 @@ def test_query_w_client_default_config_no_incoming(self): client = self._make_one( project=self.PROJECT, credentials=creds, _http=http, - query_job_config=default_job_config) + default_query_job_config=default_job_config) conn = client._connection = _make_connection(resource) client.query( diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 07a7f1b4add8..712da4c4416b 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -929,6 +929,8 @@ def test_fill_from_default(self): self.assertEqual(final_job_config.maximum_bytes_billed, 1000) def test_fill_from_default_conflict(self): + from google.cloud.bigquery import QueryJobConfig + basic_job_config = QueryJobConfig() conflicting_job_config = self._make_one('conflicting_job_type') self.assertNotEqual( From 0eac7b365d3e762e7af4b7f5f5aa79cfebbad638 Mon Sep 17 00:00:00 2001 From: blainehansen Date: Wed, 26 Sep 2018 11:25:52 -0600 Subject: [PATCH 8/9] making code tweaks --- bigquery/google/cloud/bigquery/client.py | 19 ++++++------------- bigquery/google/cloud/bigquery/job.py | 14 ++++++++------ bigquery/tests/unit/test_client.py | 6 ++++-- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 4f9aea2158c6..bc2dd2db3d95 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -1193,7 +1193,7 @@ def extract_table( def query( self, query, - job_config=None, merge_job_config=True, + job_config=None, job_id=None, job_id_prefix=None, location=None, project=None, retry=DEFAULT_RETRY): """Run a SQL query. @@ -1209,13 +1209,10 @@ def query( Keyword Arguments: job_config (google.cloud.bigquery.job.QueryJobConfig): (Optional) Extra configuration options for the job. - merge_job_config (bool): - (Optional) Merge the passed ``job_config`` - with the ``default_query_job_config`` - passed to the ``Client`` constructor. - The default value is ``True``, so to instead use only the - passed ``job_config`` without any merging, - pass ``False`` for this argument. + To override any options that were previously set in + the ``default_query_job_config`` given to the + ``Client`` constructor, manually set those options to ``None``, + or whatever value is preferred. job_id (str): (Optional) ID to use for the query job. job_id_prefix (str): (Optional) The prefix to use for a randomly generated job ID. @@ -1240,9 +1237,7 @@ def query( if location is None: location = self.location - # if they don't want to override, - # we need to merge what they passed - if merge_job_config and self._default_query_job_config: + if self._default_query_job_config: if job_config: # anything that's not defined on the incoming # that is in the default, @@ -1252,8 +1247,6 @@ def query( self._default_query_job_config) else: job_config = self._default_query_job_config - # if they want to override (or if they never passed a default), - # we simply send whatever they sent job_ref = job._JobReference(job_id, project=project, location=location) query_job = job.QueryJob( diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index c6013a02d5b2..ef726dcfaa28 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -838,13 +838,15 @@ def fill_from_default(self, default_job_config): + repr(default_job_config._job_type)) new_job_config = self.__class__() - new_job_config._properties = copy.deepcopy(self._properties) - new_job_properties = new_job_config._properties[self._job_type] - default_job_properties = default_job_config._properties[self._job_type] - for key in default_job_properties: - if key not in new_job_properties: - new_job_properties[key] = default_job_properties[key] + default_job_properties = copy.deepcopy(default_job_config._properties) + for key in self._properties: + if key != self._job_type: + default_job_properties[key] = self._properties[key] + + default_job_properties[self._job_type] \ + .update(self._properties[self._job_type]) + new_job_config._properties = default_job_properties return new_job_config diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index 3347cb538a7f..e49ec3670956 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -2792,6 +2792,7 @@ def test_query_w_explicit_job_config_override(self): 'configuration': { 'query': { 'query': query, + 'defaultDataset': None, 'useLegacySql': False, 'useQueryCache': True, 'maximumBytesBilled': '2000', @@ -2816,11 +2817,12 @@ def test_query_w_explicit_job_config_override(self): job_config = QueryJobConfig() job_config.use_query_cache = True job_config.maximum_bytes_billed = 2000 + job_config.default_dataset = None - # merge_job_config client.query( query, job_id=job_id, location=self.LOCATION, - job_config=job_config, merge_job_config=False) + job_config=job_config, + ) # Check that query actually starts the job. conn.api_request.assert_called_once_with( From 6d439dd3a7c42c750b98f012ce1b72317fc17b3a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 26 Sep 2018 13:32:41 -0700 Subject: [PATCH 9/9] Make _JobConfig._fill_from_default semi-private. Also, update the docstrings to Google/Napoleon-style. --- bigquery/google/cloud/bigquery/client.py | 2 +- bigquery/google/cloud/bigquery/job.py | 20 +++++++++++--------- bigquery/tests/unit/test_job.py | 4 ++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index bc2dd2db3d95..0723e9133af9 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -1243,7 +1243,7 @@ def query( # that is in the default, # should be filled in with the default # the incoming therefore has precedence - job_config = job_config.fill_from_default( + job_config = job_config._fill_from_default( self._default_query_job_config) else: job_config = self._default_query_job_config diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index ef726dcfaa28..7f89efa2d821 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -819,17 +819,19 @@ def to_api_repr(self): """ return copy.deepcopy(self._properties) - def fill_from_default(self, default_job_config): - """Merge this job config with another one. - The other takes precedence with conflicting keys. - This is a naive one level merge. + def _fill_from_default(self, default_job_config): + """Merge this job config with a default job config. - :type default_job_config: google.cloud.bigquery.job._JobConfig - :param default_job_config: - The default job config that will be used to fill in self. + The keys in this object take precedence over the keys in the default + config. The merge is done at the top-level as well as for keys one + level below the job type. - :rtype: :class:`google.cloud.bigquery.job._JobConfig` - :returns: A new job config. + Arguments: + default_job_config (google.cloud.bigquery.job._JobConfig): + The default job config that will be used to fill in self. + + Returns: + google.cloud.bigquery.job._JobConfig A new (merged) job config. """ if self._job_type != default_job_config._job_type: raise TypeError( diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 712da4c4416b..97c22e211a77 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -923,7 +923,7 @@ def test_fill_from_default(self): default_job_config.use_query_cache = True default_job_config.maximum_bytes_billed = 2000 - final_job_config = job_config.fill_from_default(default_job_config) + final_job_config = job_config._fill_from_default(default_job_config) self.assertTrue(final_job_config.dry_run) self.assertTrue(final_job_config.use_query_cache) self.assertEqual(final_job_config.maximum_bytes_billed, 1000) @@ -937,7 +937,7 @@ def test_fill_from_default_conflict(self): basic_job_config._job_type, conflicting_job_config._job_type) with self.assertRaises(TypeError): - basic_job_config.fill_from_default( + basic_job_config._fill_from_default( conflicting_job_config) @mock.patch('google.cloud.bigquery._helpers._get_sub_prop')