From cc852af72188c20bc1b0e151d84e556d9ec59881 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 5 Dec 2017 16:08:52 -0800 Subject: [PATCH] BigQuery: Use DatasetListItem for client.list_datasets (#4439) * BigQuery: Use DatasetListItem for client.list_datasets Listing datasets only includes a subset of the properties available on a dataset. The DatasetListItem class is used to explicitly document which features are available and to prevent confusion from trying to use the resulting object in other contexts, like updating. * Fix lint errors. * Make dataset & table reference required, labels optional. * Fix lint error --- bigquery/google/cloud/bigquery/client.py | 11 ++- bigquery/google/cloud/bigquery/dataset.py | 108 +++++++++++++++++++++- bigquery/google/cloud/bigquery/table.py | 36 ++++++-- bigquery/tests/unit/test_client.py | 4 +- bigquery/tests/unit/test_dataset.py | 97 +++++++++++++++++++ bigquery/tests/unit/test_table.py | 54 +++++++++++ 6 files changed, 292 insertions(+), 18 deletions(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 34fc9114ce92..5d3a70416c22 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -36,6 +36,7 @@ from google.cloud.bigquery._helpers import _snake_to_camel_case from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob @@ -181,8 +182,10 @@ def list_datasets(self, include_all=False, filter=None, max_results=None, :param retry: (Optional) How to retry the RPC. :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`. - accessible to the current client. + :returns: + Iterator of + :class:`~google.cloud.bigquery.dataset.DatasetListItem`. + associated with the client's project. """ extra_params = {} if include_all: @@ -1275,10 +1278,10 @@ def _item_to_dataset(iterator, resource): :type resource: dict :param resource: An item to be converted to a dataset. - :rtype: :class:`.Dataset` + :rtype: :class:`.DatasetListItem` :returns: The next dataset in the page. """ - return Dataset.from_api_repr(resource) + return DatasetListItem(resource) def _item_to_job(iterator, resource): diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index 25e00405e2c8..c8f588671ab5 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -281,8 +281,7 @@ def full_dataset_id(self): @property def reference(self): - """A :class:`~google.cloud.bigquery.dataset.DatasetReference` pointing to - this dataset. + """A reference to this dataset. Returns: google.cloud.bigquery.dataset.DatasetReference: @@ -420,7 +419,7 @@ def labels(self): :rtype: dict, {str -> str} :returns: A dict of the the dataset's labels. """ - return self._properties['labels'] + return self._properties.get('labels', {}) @labels.setter def labels(self, value): @@ -546,4 +545,105 @@ def table(self, table_id): :rtype: :class:`~google.cloud.bigquery.table.TableReference` :returns: a TableReference for a table in this dataset. """ - return TableReference(self, table_id) + return TableReference(self.reference, table_id) + + +class DatasetListItem(object): + """A read-only dataset resource from a list operation. + + For performance reasons, the BigQuery API only includes some of the + dataset properties when listing datasets. Notably, + :attr:`~google.cloud.bigquery.dataset.Dataset.access_entries` is missing. + + For a full list of the properties that the BigQuery API returns, see the + `REST documentation for datasets.list + `_. + + + Args: + resource (dict): + A dataset-like resource object from a dataset list response. A + ``datasetReference`` property is required. + + Raises: + ValueError: + If ``datasetReference`` or one of its required members is missing + from ``resource``. + """ + + def __init__(self, resource): + if 'datasetReference' not in resource: + raise ValueError('resource must contain a datasetReference value') + if 'projectId' not in resource['datasetReference']: + raise ValueError( + "resource['datasetReference'] must contain a projectId value") + if 'datasetId' not in resource['datasetReference']: + raise ValueError( + "resource['datasetReference'] must contain a datasetId value") + self._properties = resource + + @property + def project(self): + """Project bound to the dataset. + + :rtype: str + :returns: the project. + """ + return self._properties['datasetReference']['projectId'] + + @property + def dataset_id(self): + """Dataset ID. + + :rtype: str + :returns: the dataset ID. + """ + return self._properties['datasetReference']['datasetId'] + + @property + def full_dataset_id(self): + """ID for the dataset resource, in the form "project_id:dataset_id". + + :rtype: str, or ``NoneType`` + :returns: the ID (None until set from the server). + """ + return self._properties.get('id') + + @property + def friendly_name(self): + """Title of the dataset. + + :rtype: str, or ``NoneType`` + :returns: The name as set by the user, or None (the default). + """ + return self._properties.get('friendlyName') + + @property + def labels(self): + """Labels for the dataset. + + :rtype: dict, {str -> str} + :returns: A dict of the the dataset's labels. + """ + return self._properties.get('labels', {}) + + @property + def reference(self): + """A reference to this dataset. + + Returns: + google.cloud.bigquery.dataset.DatasetReference: + A pointer to this dataset + """ + return DatasetReference(self.project, self.dataset_id) + + def table(self, table_id): + """Constructs a TableReference. + + :type table_id: str + :param table_id: the ID of the table. + + :rtype: :class:`~google.cloud.bigquery.table.TableReference` + :returns: a TableReference for a table in this dataset. + """ + return TableReference(self.reference, table_id) diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index d7e4745fa8c8..d240fa6d0910 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -49,7 +49,7 @@ def _reference_getter(table): this table. Returns: - google.cloud.bigquery.table.TableReference: pointer to this table + google.cloud.bigquery.table.TableReference: pointer to this table. """ from google.cloud.bigquery import dataset @@ -295,7 +295,7 @@ def labels(self): :rtype: dict, {str -> str} :returns: A dict of the the table's labels. """ - return self._properties['labels'] + return self._properties.get('labels', {}) @labels.setter def labels(self, value): @@ -756,10 +756,28 @@ class TableListItem(object): Args: resource (dict): - A table-like resource object from a table list response. + A table-like resource object from a table list response. A + ``tableReference`` property is required. + + Raises: + ValueError: + If ``tableReference`` or one of its required members is missing + from ``resource``. """ def __init__(self, resource): + if 'tableReference' not in resource: + raise ValueError('resource must contain a tableReference value') + if 'projectId' not in resource['tableReference']: + raise ValueError( + "resource['tableReference'] must contain a projectId value") + if 'datasetId' not in resource['tableReference']: + raise ValueError( + "resource['tableReference'] must contain a datasetId value") + if 'tableId' not in resource['tableReference']: + raise ValueError( + "resource['tableReference'] must contain a tableId value") + self._properties = resource @property @@ -769,7 +787,7 @@ def project(self): Returns: str: the project ID of the table. """ - return self._properties.get('tableReference', {}).get('projectId') + return self._properties['tableReference']['projectId'] @property def dataset_id(self): @@ -778,7 +796,7 @@ def dataset_id(self): Returns: str: the dataset ID of the table. """ - return self._properties.get('tableReference', {}).get('datasetId') + return self._properties['tableReference']['datasetId'] @property def table_id(self): @@ -787,7 +805,7 @@ def table_id(self): Returns: str: the table ID. """ - return self._properties.get('tableReference', {}).get('tableId') + return self._properties['tableReference']['tableId'] reference = property(_reference_getter) @@ -842,8 +860,10 @@ def partition_expiration(self): Returns: int: The time in ms for partition expiration """ - return int( - self._properties.get('timePartitioning', {}).get('expirationMs')) + expiration = self._properties.get( + 'timePartitioning', {}).get('expirationMs') + if expiration is not None: + return int(expiration) @property def friendly_name(self): diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index 721181e3cc81..f54feacdd93d 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -183,7 +183,7 @@ def test_list_projects_explicit_response_missing_projects_key(self): {'maxResults': 3, 'pageToken': TOKEN}) def test_list_datasets_defaults(self): - from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.dataset import DatasetListItem DATASET_1 = 'dataset_one' DATASET_2 = 'dataset_two' @@ -215,7 +215,7 @@ def test_list_datasets_defaults(self): self.assertEqual(len(datasets), len(DATA['datasets'])) for found, expected in zip(datasets, DATA['datasets']): - self.assertIsInstance(found, Dataset) + self.assertIsInstance(found, DatasetListItem) self.assertEqual(found.full_dataset_id, expected['id']) self.assertEqual(found.friendly_name, expected['friendlyName']) self.assertEqual(token, TOKEN) diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py index e9e2f0dec813..33c38720beac 100644 --- a/bigquery/tests/unit/test_dataset.py +++ b/bigquery/tests/unit/test_dataset.py @@ -404,6 +404,10 @@ def test_labels_setter_bad_value(self): with self.assertRaises(ValueError): dataset.labels = None + def test_labels_getter_missing_value(self): + dataset = self._make_one(self.DS_REF) + self.assertEqual(dataset.labels, {}) + def test_from_api_repr_missing_identity(self): self._setUpConstants() RESOURCE = {} @@ -460,3 +464,96 @@ def test_table(self): self.assertEqual(table.table_id, 'table_id') self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual(table.project, self.PROJECT) + + +class TestDatasetListItem(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.dataset import DatasetListItem + + return DatasetListItem + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + project = 'test-project' + dataset_id = 'test_dataset' + resource = { + 'kind': 'bigquery#dataset', + 'id': '{}:{}'.format(project, dataset_id), + 'datasetReference': { + 'projectId': project, + 'datasetId': dataset_id, + }, + 'friendlyName': 'Data of the Test', + 'labels': { + 'some-stuff': 'this-is-a-label', + }, + } + + dataset = self._make_one(resource) + self.assertEqual(dataset.project, project) + self.assertEqual(dataset.dataset_id, dataset_id) + self.assertEqual( + dataset.full_dataset_id, + '{}:{}'.format(project, dataset_id)) + self.assertEqual(dataset.reference.project, project) + self.assertEqual(dataset.reference.dataset_id, dataset_id) + self.assertEqual(dataset.friendly_name, 'Data of the Test') + self.assertEqual(dataset.labels['some-stuff'], 'this-is-a-label') + + def test_ctor_missing_properties(self): + resource = { + 'datasetReference': { + 'projectId': 'testproject', + 'datasetId': 'testdataset', + }, + } + dataset = self._make_one(resource) + self.assertEqual(dataset.project, 'testproject') + self.assertEqual(dataset.dataset_id, 'testdataset') + self.assertIsNone(dataset.full_dataset_id) + self.assertIsNone(dataset.friendly_name) + self.assertEqual(dataset.labels, {}) + + def test_ctor_wo_project(self): + resource = { + 'datasetReference': { + 'datasetId': 'testdataset', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_dataset(self): + resource = { + 'datasetReference': { + 'projectId': 'testproject', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_reference(self): + with self.assertRaises(ValueError): + self._make_one({}) + + def test_table(self): + from google.cloud.bigquery.table import TableReference + + project = 'test-project' + dataset_id = 'test_dataset' + resource = { + 'datasetReference': { + 'projectId': project, + 'datasetId': dataset_id, + }, + } + dataset = self._make_one(resource) + table = dataset.table('table_id') + self.assertIsInstance(table, TableReference) + self.assertEqual(table.table_id, 'table_id') + self.assertEqual(table.dataset_id, dataset_id) + self.assertEqual(table.project, project) diff --git a/bigquery/tests/unit/test_table.py b/bigquery/tests/unit/test_table.py index dffa815511f1..a97bbe19c7fe 100644 --- a/bigquery/tests/unit/test_table.py +++ b/bigquery/tests/unit/test_table.py @@ -841,6 +841,60 @@ def test_ctor_view(self): # Server default for useLegacySql is True. self.assertTrue(table.view_use_legacy_sql) + def test_ctor_missing_properties(self): + resource = { + 'tableReference': { + 'projectId': 'testproject', + 'datasetId': 'testdataset', + 'tableId': 'testtable', + }, + } + table = self._make_one(resource) + self.assertEqual(table.project, 'testproject') + self.assertEqual(table.dataset_id, 'testdataset') + self.assertEqual(table.table_id, 'testtable') + self.assertIsNone(table.full_table_id) + self.assertIsNone(table.friendly_name) + self.assertIsNone(table.table_type) + self.assertIsNone(table.partitioning_type) + self.assertIsNone(table.partition_expiration) + self.assertEqual(table.labels, {}) + self.assertIsNone(table.view_use_legacy_sql) + + def test_ctor_wo_project(self): + resource = { + 'tableReference': { + 'datasetId': 'testdataset', + 'tableId': 'testtable', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_dataset(self): + resource = { + 'tableReference': { + 'projectId': 'testproject', + 'tableId': 'testtable', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_table(self): + resource = { + 'tableReference': { + 'projectId': 'testproject', + 'datasetId': 'testdataset', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_reference(self): + with self.assertRaises(ValueError): + self._make_one({}) + class TestRow(unittest.TestCase):