Skip to content

Commit

Permalink
feat: add support for table clones (#1235)
Browse files Browse the repository at this point in the history
* feat: add support for table clones

* feat: clone test

* feat: debugging

* feat: more debugging

* feat: more debugging

* feat: even more debugging

* feat: debugging test

* feat: even more test debugging

* feat: check

* feat: modify test

* feat: deleting print statement

* feat: testing

* feat: test update

* feat: change table name

* feat: debugging table name

* feat: cleaning up test

* feat: degubbing test

* feat: add properties check to test

* feat: test change

* feat: added more properties

* Update samples/snippets/requirements.txt

Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com>

Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com>
Co-authored-by: Steffany Brown <30247553+steffnay@users.noreply.github.com>
  • Loading branch information
3 people authored May 20, 2022
1 parent 6573f67 commit 176fb2a
Show file tree
Hide file tree
Showing 7 changed files with 172 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ Table
table.Row
table.RowIterator
table.SnapshotDefinition
table.CloneDefinition
table.Table
table.TableListItem
table.TableReference
Expand Down
2 changes: 2 additions & 0 deletions google/cloud/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
from google.cloud.bigquery.table import RangePartitioning
from google.cloud.bigquery.table import Row
from google.cloud.bigquery.table import SnapshotDefinition
from google.cloud.bigquery.table import CloneDefinition
from google.cloud.bigquery.table import Table
from google.cloud.bigquery.table import TableReference
from google.cloud.bigquery.table import TimePartitioningType
Expand Down Expand Up @@ -132,6 +133,7 @@
"RangePartitioning",
"Row",
"SnapshotDefinition",
"CloneDefinition",
"TimePartitioning",
"TimePartitioningType",
# Jobs
Expand Down
3 changes: 3 additions & 0 deletions google/cloud/bigquery/job/copy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ class OperationType:
SNAPSHOT = "SNAPSHOT"
"""The source table type is TABLE and the destination table type is SNAPSHOT."""

CLONE = "CLONE"
"""The source table type is TABLE and the destination table type is CLONE."""

RESTORE = "RESTORE"
"""The source table type is SNAPSHOT and the destination table type is TABLE."""

Expand Down
37 changes: 37 additions & 0 deletions google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ class Table(_TableBase):
"time_partitioning": "timePartitioning",
"schema": "schema",
"snapshot_definition": "snapshotDefinition",
"clone_definition": "cloneDefinition",
"streaming_buffer": "streamingBuffer",
"self_link": "selfLink",
"time_partitioning": "timePartitioning",
Expand Down Expand Up @@ -929,6 +930,19 @@ def snapshot_definition(self) -> Optional["SnapshotDefinition"]:
snapshot_info = SnapshotDefinition(snapshot_info)
return snapshot_info

@property
def clone_definition(self) -> Optional["CloneDefinition"]:
"""Information about the clone. This value is set via clone creation.
See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.clone_definition
"""
clone_info = self._properties.get(
self._PROPERTY_TO_API_FIELD["clone_definition"]
)
if clone_info is not None:
clone_info = CloneDefinition(clone_info)
return clone_info

@classmethod
def from_string(cls, full_table_id: str) -> "Table":
"""Construct a table from fully-qualified table ID.
Expand Down Expand Up @@ -1304,6 +1318,29 @@ def __init__(self, resource: Dict[str, Any]):
)


class CloneDefinition:
"""Information about base table and clone time of the clone.
See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clonedefinition
Args:
resource: Clone definition representation returned from the API.
"""

def __init__(self, resource: Dict[str, Any]):
self.base_table_reference = None
if "baseTableReference" in resource:
self.base_table_reference = TableReference.from_api_repr(
resource["baseTableReference"]
)

self.clone_time = None
if "cloneTime" in resource:
self.clone_time = google.cloud._helpers._rfc3339_to_datetime(
resource["cloneTime"]
)


class Row(object):
"""A BigQuery row.
Expand Down
2 changes: 1 addition & 1 deletion samples/magics/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ grpcio==1.46.1
ipython===7.31.1; python_version == '3.7'
ipython===8.0.1; python_version == '3.8'
ipython==8.3.0; python_version >= '3.9'
matplotlib==3.5.2
matplotlib==3.5.1
pandas===1.3.5; python_version == '3.7'
pandas==1.4.2; python_version >= '3.8'
pyarrow==8.0.0
Expand Down
54 changes: 54 additions & 0 deletions tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2190,3 +2190,57 @@ def test_table_snapshots(dataset_id):
rows_iter = client.list_rows(source_table_path)
rows = sorted(row.values() for row in rows_iter)
assert rows == [(1, "one"), (2, "two")]


def test_table_clones(dataset_id):
from google.cloud.bigquery import CopyJobConfig
from google.cloud.bigquery import OperationType

client = Config.CLIENT

table_path_source = f"{client.project}.{dataset_id}.test_table_clone"
clone_table_path = f"{table_path_source}_clone"

# Create the table before loading so that the column order is predictable.
schema = [
bigquery.SchemaField("foo", "INTEGER"),
bigquery.SchemaField("bar", "STRING"),
]
source_table = helpers.retry_403(Config.CLIENT.create_table)(
Table(table_path_source, schema=schema)
)

# Populate the table with initial data.
rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}]
load_job = Config.CLIENT.load_table_from_json(rows, source_table)
load_job.result()

# Now create a clone before modifying the original table data.
copy_config = CopyJobConfig()
copy_config.operation_type = OperationType.CLONE
copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

copy_job = client.copy_table(
sources=table_path_source,
destination=clone_table_path,
job_config=copy_config,
)
copy_job.result()

# List rows from the source table and compare them to rows from the clone.
rows_iter = client.list_rows(table_path_source)
rows = sorted(row.values() for row in rows_iter)
assert rows == [(1, "one"), (2, "two")]

rows_iter = client.list_rows(clone_table_path)
rows = sorted(row.values() for row in rows_iter)
assert rows == [(1, "one"), (2, "two")]

# Compare properties of the source and clone table.
source_table_props = client.get_table(table_path_source)
clone_table_props = client.get_table(clone_table_path)

assert source_table_props.schema == clone_table_props.schema
assert source_table_props.num_bytes == clone_table_props.num_bytes
assert source_table_props.num_rows == clone_table_props.num_rows
assert source_table_props.description == clone_table_props.description
74 changes: 74 additions & 0 deletions tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,40 @@ def test_snapshot_definition_set(self):
2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC
)

def test_clone_definition_not_set(self):
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
table = self._make_one(table_ref)

assert table.clone_definition is None

def test_clone_definition_set(self):
from google.cloud._helpers import UTC
from google.cloud.bigquery.table import CloneDefinition

dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
table = self._make_one(table_ref)

table._properties["cloneDefinition"] = {
"baseTableReference": {
"projectId": "project_x",
"datasetId": "dataset_y",
"tableId": "table_z",
},
"cloneTime": "2010-09-28T10:20:30.123Z",
}

clone = table.clone_definition

assert isinstance(clone, CloneDefinition)
assert clone.base_table_reference.path == (
"/projects/project_x/datasets/dataset_y/tables/table_z"
)
assert clone.clone_time == datetime.datetime(
2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC
)

def test_description_setter_bad_value(self):
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
Expand Down Expand Up @@ -1789,6 +1823,46 @@ def test_ctor_full_resource(self):
assert instance.snapshot_time == expected_time


class TestCloneDefinition:
@staticmethod
def _get_target_class():
from google.cloud.bigquery.table import CloneDefinition

return CloneDefinition

@classmethod
def _make_one(cls, *args, **kwargs):
klass = cls._get_target_class()
return klass(*args, **kwargs)

def test_ctor_empty_resource(self):
instance = self._make_one(resource={})
assert instance.base_table_reference is None
assert instance.clone_time is None

def test_ctor_full_resource(self):
from google.cloud._helpers import UTC
from google.cloud.bigquery.table import TableReference

resource = {
"baseTableReference": {
"projectId": "my-project",
"datasetId": "your-dataset",
"tableId": "our-table",
},
"cloneTime": "2005-06-07T19:35:02.123Z",
}
instance = self._make_one(resource)

expected_table_ref = TableReference.from_string(
"my-project.your-dataset.our-table"
)
assert instance.base_table_reference == expected_table_ref

expected_time = datetime.datetime(2005, 6, 7, 19, 35, 2, 123000, tzinfo=UTC)
assert instance.clone_time == expected_time


class TestRow(unittest.TestCase):
def test_row(self):
from google.cloud.bigquery.table import Row
Expand Down

0 comments on commit 176fb2a

Please sign in to comment.