Skip to content

Commit 9839a51

Browse files
authored
bigquery: Dataset constructor takes a DatasetReference (#4036)
1 parent b6504a1 commit 9839a51

File tree

8 files changed

+100
-131
lines changed

8 files changed

+100
-131
lines changed

bigquery/google/cloud/bigquery/client.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,6 @@ def create_dataset(self, dataset):
185185
:rtype: ":class:`~google.cloud.bigquery.dataset.Dataset`"
186186
:returns: a new ``Dataset`` returned from the service.
187187
"""
188-
if dataset.project is None:
189-
dataset._project = self.project
190188
path = '/projects/%s/datasets' % (dataset.project,)
191189
api_response = self._connection.api_request(
192190
method='POST', path=path, data=dataset._build_resource())
@@ -244,8 +242,6 @@ def update_dataset(self, dataset, fields):
244242
:rtype: :class:`google.cloud.bigquery.dataset.Dataset`
245243
:returns: the modified ``Dataset`` instance
246244
"""
247-
if dataset.project is None:
248-
dataset._project = self.project
249245
path = '/projects/%s/datasets/%s' % (dataset.project,
250246
dataset.dataset_id)
251247
partial = {}

bigquery/google/cloud/bigquery/dataset.py

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,10 @@ class DatasetReference(object):
106106
"""
107107

108108
def __init__(self, project, dataset_id):
109+
if not isinstance(project, six.string_types):
110+
raise ValueError("Pass a string for project")
111+
if not isinstance(dataset_id, six.string_types):
112+
raise ValueError("Pass a string for dataset_id")
109113
self._project = project
110114
self._dataset_id = dataset_id
111115

@@ -154,27 +158,15 @@ class Dataset(object):
154158
See
155159
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets
156160
157-
:type dataset_id: str
158-
:param dataset_id: the ID of the dataset
159-
160-
:type access_entries: list of :class:`AccessEntry`
161-
:param access_entries: roles granted to entities for this dataset
162-
163-
:type project: str
164-
:param project: (Optional) project ID for the dataset.
161+
:type dataset_ref: :class:`~google.cloud.bigquery.dataset.DatasetReference`
162+
:param dataset_ref: a pointer to a dataset
165163
"""
166164

167-
_access_entries = None
168-
169-
def __init__(self,
170-
dataset_id,
171-
access_entries=(),
172-
project=None):
173-
self._dataset_id = dataset_id
165+
def __init__(self, dataset_ref):
166+
self._project = dataset_ref.project
167+
self._dataset_id = dataset_ref.dataset_id
174168
self._properties = {'labels': {}}
175-
# Let the @property do validation.
176-
self.access_entries = access_entries
177-
self._project = project
169+
self._access_entries = ()
178170

179171
@property
180172
def project(self):
@@ -406,7 +398,7 @@ def from_api_repr(cls, resource):
406398
raise KeyError('Resource lacks required identity information:'
407399
'["datasetReference"]["datasetId"]')
408400
dataset_id = dsr['datasetId']
409-
dataset = cls(dataset_id, project=dsr['projectId'])
401+
dataset = cls(DatasetReference(dsr['projectId'], dataset_id))
410402
dataset._set_properties(resource)
411403
return dataset
412404

bigquery/google/cloud/bigquery/job.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -842,8 +842,9 @@ def from_api_repr(cls, resource, client):
842842
"""
843843
job_id, config = cls._get_resource_config(resource)
844844
dest_config = config['destinationTable']
845-
dataset = Dataset(dest_config['datasetId'],
846-
project=dest_config['projectId'])
845+
ds_ref = DatasetReference(dest_config['projectId'],
846+
dest_config['datasetId'],)
847+
dataset = Dataset(ds_ref)
847848
table_ref = TableReference(dataset, dest_config['tableId'])
848849
destination = Table(table_ref, client=client)
849850
source_urls = config.get('sourceUris', ())
@@ -959,8 +960,9 @@ def from_api_repr(cls, resource, client):
959960
"""
960961
job_id, config = cls._get_resource_config(resource)
961962
dest_config = config['destinationTable']
962-
dataset = Dataset(dest_config['datasetId'],
963-
project=dest_config['projectId'])
963+
ds_ref = DatasetReference(dest_config['projectId'],
964+
dest_config['datasetId'],)
965+
dataset = Dataset(ds_ref)
964966
table_ref = TableReference(dataset, dest_config['tableId'])
965967
destination = Table(table_ref, client=client)
966968
sources = []
@@ -972,9 +974,9 @@ def from_api_repr(cls, resource, client):
972974
"Resource missing 'sourceTables' / 'sourceTable'")
973975
source_configs = [single]
974976
for source_config in source_configs:
975-
dataset = Dataset(source_config['datasetId'],
976-
project=source_config['projectId'])
977-
table_ref = TableReference(dataset, source_config['tableId'])
977+
ds_ref = DatasetReference(source_config['projectId'],
978+
source_config['datasetId'])
979+
table_ref = ds_ref.table(source_config['tableId'])
978980
sources.append(Table(table_ref, client=client))
979981
job = cls(job_id, destination, sources, client=client)
980982
job._set_properties(resource)
@@ -1426,17 +1428,17 @@ def _copy_configuration_properties(self, configuration):
14261428
dest_local = self._destination_table_resource()
14271429
if dest_remote != dest_local:
14281430
project = dest_remote['projectId']
1429-
dataset = Dataset(dest_remote['datasetId'], project=project)
1431+
dataset = Dataset(DatasetReference(project,
1432+
dest_remote['datasetId']))
14301433
self.destination = dataset.table(dest_remote['tableId'])
14311434

14321435
def_ds = configuration.get('defaultDataset')
14331436
if def_ds is None:
14341437
if self.default_dataset is not None:
14351438
del self.default_dataset
14361439
else:
1437-
self.default_dataset = Dataset(def_ds['datasetId'],
1438-
project=def_ds['projectId'])
1439-
1440+
self.default_dataset = Dataset(
1441+
DatasetReference(def_ds['projectId'], def_ds['datasetId']))
14401442
udf_resources = []
14411443
for udf_mapping in configuration.get(self._UDF_KEY, ()):
14421444
key_val, = udf_mapping.items()
@@ -1587,11 +1589,11 @@ def referenced_tables(self):
15871589

15881590
t_project = table['projectId']
15891591

1590-
ds_name = table['datasetId']
1591-
t_dataset = datasets_by_project_name.get((t_project, ds_name))
1592+
ds_id = table['datasetId']
1593+
t_dataset = datasets_by_project_name.get((t_project, ds_id))
15921594
if t_dataset is None:
1593-
t_dataset = Dataset(ds_name, project=t_project)
1594-
datasets_by_project_name[(t_project, ds_name)] = t_dataset
1595+
t_dataset = DatasetReference(t_project, ds_id)
1596+
datasets_by_project_name[(t_project, ds_id)] = t_dataset
15951597

15961598
t_name = table['tableId']
15971599
tables.append(t_dataset.table(t_name))

bigquery/tests/system.py

Lines changed: 27 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,7 @@ def _still_in_use(bad_request):
112112

113113
def test_create_dataset(self):
114114
DATASET_ID = _make_dataset_id('create_dataset')
115-
dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID))
116-
self.to_delete.append(dataset)
115+
dataset = self.temp_dataset(DATASET_ID)
117116

118117
self.assertTrue(_dataset_exists(dataset))
119118
self.assertEqual(dataset.dataset_id, DATASET_ID)
@@ -122,7 +121,7 @@ def test_create_dataset(self):
122121
def test_get_dataset(self):
123122
DATASET_ID = _make_dataset_id('get_dataset')
124123
client = Config.CLIENT
125-
dataset_arg = Dataset(DATASET_ID, project=client.project)
124+
dataset_arg = Dataset(client.dataset(DATASET_ID))
126125
dataset_arg.friendly_name = 'Friendly'
127126
dataset_arg.description = 'Description'
128127
dataset = retry_403(client.create_dataset)(dataset_arg)
@@ -135,10 +134,7 @@ def test_get_dataset(self):
135134
self.assertEqual(got.description, 'Description')
136135

137136
def test_update_dataset(self):
138-
dataset = retry_403(Config.CLIENT.create_dataset)(
139-
Dataset(_make_dataset_id('update_dataset')))
140-
self.to_delete.append(dataset)
141-
137+
dataset = self.temp_dataset(_make_dataset_id('update_dataset'))
142138
self.assertTrue(_dataset_exists(dataset))
143139
self.assertIsNone(dataset.friendly_name)
144140
self.assertIsNone(dataset.description)
@@ -163,16 +159,15 @@ def test_update_dataset(self):
163159
self.assertEqual(ds3.labels, {'color': 'green', 'shape': 'circle'})
164160

165161
# TODO(jba): test that read-modify-write with ETag works.
162+
166163
def test_list_datasets(self):
167164
datasets_to_create = [
168165
'new' + unique_resource_id(),
169166
'newer' + unique_resource_id(),
170167
'newest' + unique_resource_id(),
171168
]
172169
for dataset_id in datasets_to_create:
173-
created_dataset = retry_403(Config.CLIENT.create_dataset)(
174-
Dataset(dataset_id))
175-
self.to_delete.append(created_dataset)
170+
self.temp_dataset(dataset_id)
176171

177172
# Retrieve the datasets.
178173
iterator = Config.CLIENT.list_datasets()
@@ -184,9 +179,7 @@ def test_list_datasets(self):
184179
self.assertEqual(len(created), len(datasets_to_create))
185180

186181
def test_create_table(self):
187-
dataset = retry_403(Config.CLIENT.create_dataset)(
188-
Dataset(_make_dataset_id('create_table')))
189-
self.to_delete.append(dataset)
182+
dataset = self.temp_dataset(_make_dataset_id('create_table'))
190183

191184
TABLE_NAME = 'test_table'
192185
full_name = bigquery.SchemaField('full_name', 'STRING',
@@ -217,9 +210,7 @@ def test_get_table_w_public_dataset(self):
217210

218211
def test_list_dataset_tables(self):
219212
DATASET_ID = _make_dataset_id('list_tables')
220-
dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID))
221-
self.to_delete.append(dataset)
222-
213+
dataset = self.temp_dataset(DATASET_ID)
223214
# Retrieve tables before any are created for the dataset.
224215
iterator = Config.CLIENT.list_dataset_tables(dataset)
225216
all_tables = list(iterator)
@@ -252,9 +243,7 @@ def test_list_dataset_tables(self):
252243
self.assertEqual(len(created), len(tables_to_create))
253244

254245
def test_patch_table(self):
255-
dataset = retry_403(Config.CLIENT.create_dataset)(
256-
Dataset(_make_dataset_id('patch_table')))
257-
self.to_delete.append(dataset)
246+
dataset = self.temp_dataset(_make_dataset_id('patch_table'))
258247

259248
TABLE_NAME = 'test_table'
260249
full_name = bigquery.SchemaField('full_name', 'STRING',
@@ -273,9 +262,7 @@ def test_patch_table(self):
273262
self.assertEqual(table.description, 'Description')
274263

275264
def test_update_table(self):
276-
dataset = retry_403(Config.CLIENT.create_dataset)(
277-
Dataset(_make_dataset_id('update_table')))
278-
self.to_delete.append(dataset)
265+
dataset = self.temp_dataset(_make_dataset_id('update_table'))
279266

280267
TABLE_NAME = 'test_table'
281268
full_name = bigquery.SchemaField('full_name', 'STRING',
@@ -316,10 +303,7 @@ def test_insert_data_then_dump_table(self):
316303
]
317304
ROW_IDS = range(len(ROWS))
318305

319-
dataset = retry_403(Config.CLIENT.create_dataset)(
320-
Dataset(_make_dataset_id('insert_data_then_dump')))
321-
self.to_delete.append(dataset)
322-
306+
dataset = self.temp_dataset(_make_dataset_id('insert_data_then_dump'))
323307
TABLE_NAME = 'test_table'
324308
full_name = bigquery.SchemaField('full_name', 'STRING',
325309
mode='REQUIRED')
@@ -358,10 +342,7 @@ def test_load_table_from_local_file_then_dump_table(self):
358342
]
359343
TABLE_NAME = 'test_table'
360344

361-
dataset = retry_403(Config.CLIENT.create_dataset)(
362-
Dataset(_make_dataset_id('load_local_then_dump')))
363-
self.to_delete.append(dataset)
364-
345+
dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump'))
365346
full_name = bigquery.SchemaField('full_name', 'STRING',
366347
mode='REQUIRED')
367348
age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
@@ -406,10 +387,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self):
406387
("orange", 590),
407388
("red", 650)]
408389

409-
dataset = retry_403(Config.CLIENT.create_dataset)(
410-
Dataset(_make_dataset_id('load_local_then_dump')))
411-
self.to_delete.append(dataset)
412-
390+
dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump'))
413391
table = Table(dataset.table(TABLE_NAME), client=Config.CLIENT)
414392
self.to_delete.insert(0, table)
415393

@@ -467,9 +445,7 @@ def test_load_table_from_storage_then_dump_table(self):
467445

468446
self.to_delete.insert(0, blob)
469447

470-
dataset = retry_403(Config.CLIENT.create_dataset)(
471-
Dataset(_make_dataset_id('load_gcs_then_dump')))
472-
self.to_delete.append(dataset)
448+
dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump'))
473449

474450
full_name = bigquery.SchemaField('full_name', 'STRING',
475451
mode='REQUIRED')
@@ -536,10 +512,7 @@ def test_load_table_from_storage_w_autodetect_schema(self):
536512

537513
self.to_delete.insert(0, blob)
538514

539-
dataset = retry_403(Config.CLIENT.create_dataset)(
540-
Dataset(_make_dataset_id('load_gcs_then_dump')))
541-
self.to_delete.append(dataset)
542-
515+
dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump'))
543516
table_ref = dataset.table(table_name)
544517

545518
job = Config.CLIENT.load_table_from_storage(
@@ -589,9 +562,7 @@ def _load_table_for_extract_table(
589562
blob.upload_from_file(csv_read, content_type='text/csv')
590563
self.to_delete.insert(0, blob)
591564

592-
dataset = retry_403(Config.CLIENT.create_dataset)(
593-
Dataset(table.dataset_id))
594-
self.to_delete.append(dataset)
565+
dataset = self.temp_dataset(table.dataset_id)
595566
table_ref = dataset.table(table.table_id)
596567
job = Config.CLIENT.load_table_from_storage(
597568
'bq_extract_storage_test_' + local_id, table_ref, gs_url)
@@ -676,8 +647,7 @@ def test_job_cancel(self):
676647
TABLE_NAME = 'test_table'
677648
QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME)
678649

679-
dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(DATASET_ID))
680-
self.to_delete.append(dataset)
650+
dataset = self.temp_dataset(DATASET_ID)
681651

682652
full_name = bigquery.SchemaField('full_name', 'STRING',
683653
mode='REQUIRED')
@@ -866,9 +836,7 @@ def test_dbapi_fetchall(self):
866836
def _load_table_for_dml(self, rows, dataset_id, table_id):
867837
from google.cloud._testing import _NamedTemporaryFile
868838

869-
dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_id))
870-
self.to_delete.append(dataset)
871-
839+
dataset = self.temp_dataset(dataset_id)
872840
greeting = bigquery.SchemaField(
873841
'greeting', 'STRING', mode='NULLABLE')
874842
table = Table(dataset.table(table_id), schema=[greeting],
@@ -1190,8 +1158,7 @@ def test_dump_table_w_public_data(self):
11901158
DATASET_ID = 'samples'
11911159
TABLE_NAME = 'natality'
11921160

1193-
dataset = Dataset(DATASET_ID, project=PUBLIC)
1194-
table_ref = dataset.table(TABLE_NAME)
1161+
table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_NAME)
11951162
table = Config.CLIENT.get_table(table_ref)
11961163
self._fetch_single_page(table)
11971164

@@ -1242,10 +1209,7 @@ def test_insert_nested_nested(self):
12421209
('Some value', record)
12431210
]
12441211
table_name = 'test_table'
1245-
dataset = retry_403(Config.CLIENT.create_dataset)(
1246-
Dataset(_make_dataset_id('issue_2951')))
1247-
self.to_delete.append(dataset)
1248-
1212+
dataset = self.temp_dataset(_make_dataset_id('issue_2951'))
12491213
table = Table(dataset.table(table_name), schema=schema,
12501214
client=Config.CLIENT)
12511215
table.create()
@@ -1260,10 +1224,8 @@ def test_insert_nested_nested(self):
12601224

12611225
def test_create_table_insert_fetch_nested_schema(self):
12621226
table_name = 'test_table'
1263-
dataset = retry_403(Config.CLIENT.create_dataset)(
1264-
Dataset(_make_dataset_id('create_table_nested_schema')))
1265-
self.to_delete.append(dataset)
1266-
1227+
dataset = self.temp_dataset(
1228+
_make_dataset_id('create_table_nested_schema'))
12671229
schema = _load_json_schema()
12681230
table = Table(dataset.table(table_name), schema=schema,
12691231
client=Config.CLIENT)
@@ -1321,6 +1283,12 @@ def test_create_table_insert_fetch_nested_schema(self):
13211283
e_favtime = datetime.datetime(*parts[0:6])
13221284
self.assertEqual(found[7], e_favtime) # FavoriteTime
13231285

1286+
def temp_dataset(self, dataset_id):
1287+
dataset = retry_403(Config.CLIENT.create_dataset)(
1288+
Dataset(Config.CLIENT.dataset(dataset_id)))
1289+
self.to_delete.append(dataset)
1290+
return dataset
1291+
13241292

13251293
def _job_done(instance):
13261294
return instance.state.lower() == 'done'

0 commit comments

Comments
 (0)