Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add default value expression to SchemaField #1408

Merged
merged 7 commits into from
Nov 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,38 @@ class SchemaField(object):
Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type.

max_length: Maximum length of fields with STRING or BYTES type.

default_value_expression: str, Optional
Used to specify the default value of a field using a SQL expression. It can only be set for
top level fields (columns).

You can use a struct or array expression to specify default value for the entire struct or
array. The valid SQL expressions are:

- Literals for all data types, including STRUCT and ARRAY.

- The following functions:

`CURRENT_TIMESTAMP`
`CURRENT_TIME`
`CURRENT_DATE`
`CURRENT_DATETIME`
`GENERATE_UUID`
`RAND`
`SESSION_USER`
`ST_GEOPOINT`

- Struct or array composed with the above allowed functions, for example:

"[CURRENT_DATE(), DATE '2020-01-01'"]
"""

def __init__(
self,
name: str,
field_type: str,
mode: str = "NULLABLE",
default_value_expression: str = None,
description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE,
fields: Iterable["SchemaField"] = (),
policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE,
Expand All @@ -115,6 +140,8 @@ def __init__(
self._properties["mode"] = mode.upper()
if description is not _DEFAULT_VALUE:
self._properties["description"] = description
if default_value_expression is not None:
self._properties["defaultValueExpression"] = default_value_expression
if precision is not _DEFAULT_VALUE:
self._properties["precision"] = precision
if scale is not _DEFAULT_VALUE:
Expand Down Expand Up @@ -154,13 +181,16 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
fields = api_repr.get("fields", ())
policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE)

default_value_expression = api_repr.get("defaultValueExpression", None)

if policy_tags is not None and policy_tags is not _DEFAULT_VALUE:
policy_tags = PolicyTagList.from_api_repr(policy_tags)

return cls(
field_type=field_type,
fields=[cls.from_api_repr(f) for f in fields],
mode=mode.upper(),
default_value_expression=default_value_expression,
description=description,
name=api_repr["name"],
policy_tags=policy_tags,
Expand Down Expand Up @@ -197,6 +227,11 @@ def is_nullable(self):
"""bool: whether 'mode' is 'nullable'."""
return self.mode == "NULLABLE"

@property
def default_value_expression(self):
"""Optional[str] default value of a field, using an SQL expression"""
return self._properties.get("defaultValueExpression")

@property
def description(self):
"""Optional[str]: description for the field."""
Expand Down Expand Up @@ -260,7 +295,7 @@ def _key(self):
field_type = self.field_type.upper() if self.field_type is not None else None

# Type can temporarily be set to None if the code needs a SchemaField instance,
# but has npt determined the exact type of the field yet.
# but has not determined the exact type of the field yet.
if field_type is not None:
if field_type == "STRING" or field_type == "BYTES":
if self.max_length is not None:
Expand All @@ -281,6 +316,7 @@ def _key(self):
field_type,
# Mode is always str, if not given it defaults to a str value
self.mode.upper(), # pytype: disable=attribute-error
self.default_value_expression,
self.description,
self._fields,
policy_tags,
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1421,7 +1421,7 @@ def get(self, key: str, default: Any = None) -> Any:
>>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z')
None

The default value can be overrided with the ``default`` parameter.
The default value can be overridden with the ``default`` parameter.

>>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '')
''
Expand Down
62 changes: 62 additions & 0 deletions tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,68 @@ def test_create_table_with_real_custom_policy(self):
list(table.schema[1].policy_tags.names), [child_policy_tag.name]
)

def test_create_table_with_default_value_expression(self):
dataset = self.temp_dataset(
_make_dataset_id("create_table_with_default_value_expression")
)

table_id = "test_table"
timestamp_field_name = "timestamp_field_with_default_value_expression"

string_default_val_expression = "'FOO'"
timestamp_default_val_expression = "CURRENT_TIMESTAMP"

schema = [
bigquery.SchemaField(
"username",
"STRING",
default_value_expression=string_default_val_expression,
),
bigquery.SchemaField(
timestamp_field_name,
"TIMESTAMP",
default_value_expression=timestamp_default_val_expression,
),
]
table_arg = Table(dataset.table(table_id), schema=schema)
self.assertFalse(_table_exists(table_arg))

table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)

self.assertTrue(_table_exists(table))

# Fetch the created table and its metadata to verify that the default
# value expression is assigned to fields
remote_table = Config.CLIENT.get_table(table)
remote_schema = remote_table.schema
self.assertEqual(remote_schema, schema)

for field in remote_schema:
if field.name == string_default_val_expression:
self.assertEqual("'FOO'", field.default_value_expression)
if field.name == timestamp_default_val_expression:
self.assertEqual("CURRENT_TIMESTAMP", field.default_value_expression)

# Insert rows into the created table to verify default values are populated
# when value is not provided
NOW_SECONDS = 1448911495.484366
NOW = datetime.datetime.utcfromtimestamp(NOW_SECONDS).replace(tzinfo=UTC)

# Rows to insert. Row #1 will have default `TIMESTAMP` defaultValueExpression CURRENT_TIME
# Row #2 will have default `STRING` defaultValueExpression "'FOO"
ROWS = [{"username": "john_doe"}, {timestamp_field_name: NOW}]

errors = Config.CLIENT.insert_rows(table, ROWS)
self.assertEqual(len(errors), 0)

# Get list of inserted rows
row_1, row_2 = [row for row in list(Config.CLIENT.list_rows(table))]

# Assert that row values are populated with default value expression
self.assertIsInstance(row_1.get(timestamp_field_name), datetime.datetime)
self.assertEqual("FOO", row_2.get("username"))

def test_create_table_w_time_partitioning_w_clustering_fields(self):
from google.cloud.bigquery.table import TimePartitioning
from google.cloud.bigquery.table import TimePartitioningType
Expand Down
40 changes: 28 additions & 12 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8395,9 +8395,19 @@ def test_schema_from_json_with_file_path(self):
]"""

expected = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
SchemaField(
"rep",
"STRING",
"NULLABLE",
description="sales representative",
),
SchemaField(
"sales",
"FLOAT",
"NULLABLE",
description="total sales",
),
]

client = self._make_client()
Expand Down Expand Up @@ -8441,9 +8451,11 @@ def test_schema_from_json_with_file_object(self):
]"""

expected = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
SchemaField(
"rep", "STRING", "NULLABLE", description="sales representative"
),
SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"),
]

client = self._make_client()
Expand Down Expand Up @@ -8477,9 +8489,11 @@ def test_schema_to_json_with_file_path(self):
]

schema_list = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
SchemaField(
"rep", "STRING", "NULLABLE", description="sales representative"
),
SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"),
]

client = self._make_client()
Expand Down Expand Up @@ -8521,9 +8535,11 @@ def test_schema_to_json_with_file_object(self):
]

schema_list = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
SchemaField(
"rep", "STRING", "NULLABLE", description="sales representative"
),
SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"),
]

fake_file = io.StringIO()
Expand Down
9 changes: 7 additions & 2 deletions tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ def test_constructor_defaults(self):
self.assertIsNone(field.description)
self.assertEqual(field.fields, ())
self.assertIsNone(field.policy_tags)
self.assertIsNone(field.default_value_expression)

def test_constructor_explicit(self):
FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field"
field = self._make_one(
"test",
"STRING",
Expand All @@ -58,10 +60,12 @@ def test_constructor_explicit(self):
"projects/f/locations/g/taxonomies/h/policyTags/i",
)
),
default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION,
)
self.assertEqual(field.name, "test")
self.assertEqual(field.field_type, "STRING")
self.assertEqual(field.mode, "REQUIRED")
self.assertEqual(field.default_value_expression, FIELD_DEFAULT_VALUE_EXPRESSION)
self.assertEqual(field.description, "Testing")
self.assertEqual(field.fields, ())
self.assertEqual(
Expand Down Expand Up @@ -182,6 +186,7 @@ def test_from_api_repr_defaults(self):
self.assertEqual(field.field_type, "RECORD")
self.assertEqual(field.mode, "NULLABLE")
self.assertEqual(len(field.fields), 0)
self.assertEqual(field.default_value_expression, None)

# Keys not present in API representation shouldn't be included in
# _properties.
Expand Down Expand Up @@ -527,12 +532,12 @@ def test___hash__not_equals(self):

def test___repr__(self):
field1 = self._make_one("field1", "STRING")
expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)"
expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None)"
self.assertEqual(repr(field1), expected)

def test___repr__type_not_set(self):
field1 = self._make_one("field1", field_type=None)
expected = "SchemaField('field1', None, 'NULLABLE', None, (), None)"
expected = "SchemaField('field1', None, 'NULLABLE', None, None, (), None)"
self.assertEqual(repr(field1), expected)

def test___repr__evaluable_no_policy_tags(self):
Expand Down