From 176fb2afc9888c6b0cd74d590065b3002bdbf533 Mon Sep 17 00:00:00 2001 From: Alma Becerril Salas <47731219+abecerrilsalas@users.noreply.github.com> Date: Fri, 20 May 2022 12:22:56 -0700 Subject: [PATCH] feat: add support for table clones (#1235) * feat: add support for table clones * feat: clone test * feat: debugging * feat: more debugging * feat: more debugging * feat: even more debugging * feat: debugging test * feat: even more test debugging * feat: check * feat: modify test * feat: deleting print statement * feat: testing * feat: test update * feat: change table name * feat: debugging table name * feat: cleaning up test * feat: degubbing test * feat: add properties check to test * feat: test change * feat: added more properties * Update samples/snippets/requirements.txt Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Co-authored-by: Steffany Brown <30247553+steffnay@users.noreply.github.com> --- docs/reference.rst | 1 + google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/job/copy_.py | 3 ++ google/cloud/bigquery/table.py | 37 +++++++++++++++ samples/magics/requirements.txt | 2 +- tests/system/test_client.py | 54 ++++++++++++++++++++++ tests/unit/test_table.py | 74 ++++++++++++++++++++++++++++++ 7 files changed, 172 insertions(+), 1 deletion(-) diff --git a/docs/reference.rst b/docs/reference.rst index 4f655b09e..b886f1161 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -97,6 +97,7 @@ Table table.Row table.RowIterator table.SnapshotDefinition + table.CloneDefinition table.Table table.TableListItem table.TableReference diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 81b1285e3..5a4520476 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -101,6 +101,7 @@ from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import Row from google.cloud.bigquery.table import SnapshotDefinition +from google.cloud.bigquery.table import CloneDefinition from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioningType @@ -132,6 +133,7 @@ "RangePartitioning", "Row", "SnapshotDefinition", + "CloneDefinition", "TimePartitioning", "TimePartitioningType", # Jobs diff --git a/google/cloud/bigquery/job/copy_.py b/google/cloud/bigquery/job/copy_.py index 29558c01f..eb7f609a5 100644 --- a/google/cloud/bigquery/job/copy_.py +++ b/google/cloud/bigquery/job/copy_.py @@ -40,6 +40,9 @@ class OperationType: SNAPSHOT = "SNAPSHOT" """The source table type is TABLE and the destination table type is SNAPSHOT.""" + CLONE = "CLONE" + """The source table type is TABLE and the destination table type is CLONE.""" + RESTORE = "RESTORE" """The source table type is SNAPSHOT and the destination table type is TABLE.""" diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 7b8c6441f..72eb1baf6 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -356,6 +356,7 @@ class Table(_TableBase): "time_partitioning": "timePartitioning", "schema": "schema", "snapshot_definition": "snapshotDefinition", + "clone_definition": "cloneDefinition", "streaming_buffer": "streamingBuffer", "self_link": "selfLink", "time_partitioning": "timePartitioning", @@ -929,6 +930,19 @@ def snapshot_definition(self) -> Optional["SnapshotDefinition"]: snapshot_info = SnapshotDefinition(snapshot_info) return snapshot_info + @property + def clone_definition(self) -> Optional["CloneDefinition"]: + """Information about the clone. This value is set via clone creation. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.clone_definition + """ + clone_info = self._properties.get( + self._PROPERTY_TO_API_FIELD["clone_definition"] + ) + if clone_info is not None: + clone_info = CloneDefinition(clone_info) + return clone_info + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. @@ -1304,6 +1318,29 @@ def __init__(self, resource: Dict[str, Any]): ) +class CloneDefinition: + """Information about base table and clone time of the clone. + + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clonedefinition + + Args: + resource: Clone definition representation returned from the API. + """ + + def __init__(self, resource: Dict[str, Any]): + self.base_table_reference = None + if "baseTableReference" in resource: + self.base_table_reference = TableReference.from_api_repr( + resource["baseTableReference"] + ) + + self.clone_time = None + if "cloneTime" in resource: + self.clone_time = google.cloud._helpers._rfc3339_to_datetime( + resource["cloneTime"] + ) + + class Row(object): """A BigQuery row. diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index cf682fd77..f26b4dc9b 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -5,7 +5,7 @@ grpcio==1.46.1 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython==8.3.0; python_version >= '3.9' -matplotlib==3.5.2 +matplotlib==3.5.1 pandas===1.3.5; python_version == '3.7' pandas==1.4.2; python_version >= '3.8' pyarrow==8.0.0 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 773ef3c90..49eb70a8b 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2190,3 +2190,57 @@ def test_table_snapshots(dataset_id): rows_iter = client.list_rows(source_table_path) rows = sorted(row.values() for row in rows_iter) assert rows == [(1, "one"), (2, "two")] + + +def test_table_clones(dataset_id): + from google.cloud.bigquery import CopyJobConfig + from google.cloud.bigquery import OperationType + + client = Config.CLIENT + + table_path_source = f"{client.project}.{dataset_id}.test_table_clone" + clone_table_path = f"{table_path_source}_clone" + + # Create the table before loading so that the column order is predictable. + schema = [ + bigquery.SchemaField("foo", "INTEGER"), + bigquery.SchemaField("bar", "STRING"), + ] + source_table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_path_source, schema=schema) + ) + + # Populate the table with initial data. + rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}] + load_job = Config.CLIENT.load_table_from_json(rows, source_table) + load_job.result() + + # Now create a clone before modifying the original table data. + copy_config = CopyJobConfig() + copy_config.operation_type = OperationType.CLONE + copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + + copy_job = client.copy_table( + sources=table_path_source, + destination=clone_table_path, + job_config=copy_config, + ) + copy_job.result() + + # List rows from the source table and compare them to rows from the clone. + rows_iter = client.list_rows(table_path_source) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] + + rows_iter = client.list_rows(clone_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] + + # Compare properties of the source and clone table. + source_table_props = client.get_table(table_path_source) + clone_table_props = client.get_table(clone_table_path) + + assert source_table_props.schema == clone_table_props.schema + assert source_table_props.num_bytes == clone_table_props.num_bytes + assert source_table_props.num_rows == clone_table_props.num_rows + assert source_table_props.description == clone_table_props.description diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index ba35b2297..b5f2e58c6 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -841,6 +841,40 @@ def test_snapshot_definition_set(self): 2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC ) + def test_clone_definition_not_set(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + assert table.clone_definition is None + + def test_clone_definition_set(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.table import CloneDefinition + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table._properties["cloneDefinition"] = { + "baseTableReference": { + "projectId": "project_x", + "datasetId": "dataset_y", + "tableId": "table_z", + }, + "cloneTime": "2010-09-28T10:20:30.123Z", + } + + clone = table.clone_definition + + assert isinstance(clone, CloneDefinition) + assert clone.base_table_reference.path == ( + "/projects/project_x/datasets/dataset_y/tables/table_z" + ) + assert clone.clone_time == datetime.datetime( + 2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC + ) + def test_description_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -1789,6 +1823,46 @@ def test_ctor_full_resource(self): assert instance.snapshot_time == expected_time +class TestCloneDefinition: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import CloneDefinition + + return CloneDefinition + + @classmethod + def _make_one(cls, *args, **kwargs): + klass = cls._get_target_class() + return klass(*args, **kwargs) + + def test_ctor_empty_resource(self): + instance = self._make_one(resource={}) + assert instance.base_table_reference is None + assert instance.clone_time is None + + def test_ctor_full_resource(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.table import TableReference + + resource = { + "baseTableReference": { + "projectId": "my-project", + "datasetId": "your-dataset", + "tableId": "our-table", + }, + "cloneTime": "2005-06-07T19:35:02.123Z", + } + instance = self._make_one(resource) + + expected_table_ref = TableReference.from_string( + "my-project.your-dataset.our-table" + ) + assert instance.base_table_reference == expected_table_ref + + expected_time = datetime.datetime(2005, 6, 7, 19, 35, 2, 123000, tzinfo=UTC) + assert instance.clone_time == expected_time + + class TestRow(unittest.TestCase): def test_row(self): from google.cloud.bigquery.table import Row