From 46e8fd26c1e3f972f8a7a5c6ae754db226d41c53 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 30 Nov 2015 11:18:05 -0800 Subject: [PATCH 1/2] Updating BigQuery inserted timestamps to use seconds. As stated in the docs > BigQuery stores TIMESTAMP data internally as a > UNIX timestamp with microsecond precision > ...Specifies the number of seconds since the epoch. --- gcloud/bigquery/table.py | 8 ++++++-- gcloud/bigquery/test_table.py | 7 +++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/gcloud/bigquery/table.py b/gcloud/bigquery/table.py index 45e74cebe2d3..9fc9a5c3b2ec 100644 --- a/gcloud/bigquery/table.py +++ b/gcloud/bigquery/table.py @@ -19,6 +19,7 @@ import six from gcloud._helpers import _datetime_from_microseconds +from gcloud._helpers import _microseconds_from_datetime from gcloud._helpers import _millis_from_datetime from gcloud.exceptions import NotFound from gcloud.bigquery._helpers import _rows_from_json @@ -657,8 +658,11 @@ def insert_data(self, row_info = {} for field, value in zip(self._schema, row): - if field.field_type == 'TIMESTAMP': - value = _millis_from_datetime(value) + if field.field_type == 'TIMESTAMP' and value is not None: + # BigQuery stores TIMESTAMP data internally as a + # UNIX timestamp with microsecond precision. + # Specifies the number of seconds since the epoch. + value = _microseconds_from_datetime(value) * 1e-6 row_info[field.name] = value info = {'json': row_info} diff --git a/gcloud/bigquery/test_table.py b/gcloud/bigquery/test_table.py index ddfeae95220c..f2b03668bcde 100644 --- a/gcloud/bigquery/test_table.py +++ b/gcloud/bigquery/test_table.py @@ -1060,7 +1060,7 @@ def test_fetch_data_w_record_schema(self): def test_insert_data_w_bound_client(self): import datetime from gcloud._helpers import UTC - from gcloud._helpers import _millis_from_datetime + from gcloud._helpers import _microseconds_from_datetime from gcloud.bigquery.table import SchemaField WHEN_TS = 1437767599.006 @@ -1084,9 +1084,12 @@ def test_insert_data_w_bound_client(self): ] def _row_data(row): + joined = None + if row[2] is not None: + joined = _microseconds_from_datetime(row[2]) * 1e-6 return {'full_name': row[0], 'age': row[1], - 'joined': _millis_from_datetime(row[2])} + 'joined': joined} SENT = { 'rows': [{'json': _row_data(row)} for row in ROWS], From b592be7b150840f893b66c9367a9778f713f3f65 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 30 Nov 2015 12:19:02 -0800 Subject: [PATCH 2/2] Updating BigQuery system test to verify timestamp handling. --- system_tests/bigquery.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/system_tests/bigquery.py b/system_tests/bigquery.py index 06db634346cc..b5114187c464 100644 --- a/system_tests/bigquery.py +++ b/system_tests/bigquery.py @@ -191,11 +191,17 @@ def test_update_table(self): self.assertEqual(found.mode, expected.mode) def test_load_table_then_dump_table(self): + import datetime + from gcloud._helpers import UTC + + NOW_SECONDS = 1448911495.484366 + NOW = datetime.datetime.utcfromtimestamp( + NOW_SECONDS).replace(tzinfo=UTC) ROWS = [ - ('Phred Phlyntstone', 32), - ('Bharney Rhubble', 33), - ('Wylma Phlyntstone', 29), - ('Bhettye Rhubble', 27), + ('Phred Phlyntstone', 32, NOW), + ('Bharney Rhubble', 33, NOW + datetime.timedelta(seconds=10)), + ('Wylma Phlyntstone', 29, NOW + datetime.timedelta(seconds=20)), + ('Bhettye Rhubble', 27, None), ] ROW_IDS = range(len(ROWS)) dataset = CLIENT.dataset(DATASET_NAME) @@ -206,7 +212,8 @@ def test_load_table_then_dump_table(self): full_name = bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED') age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED') - table = dataset.table(TABLE_NAME, schema=[full_name, age]) + now = bigquery.SchemaField('now', 'TIMESTAMP') + table = dataset.table(TABLE_NAME, schema=[full_name, age, now]) self.assertFalse(table.exists()) table.create() self.to_delete.insert(0, table)