Skip to content

Commit

Permalink
feat: support RANGE in schema (#1746)
Browse files Browse the repository at this point in the history
* feat: support RANGE in schema

* lint

* fix python 3.7 error

* remove unused test method

* address comments

* add system test

* correct range json schema

* json format

* change system test to adjust to upstream table

* fix systest

* remove insert row with range

* systest

* add unit test

* fix mypy error

* error

* address comments
  • Loading branch information
Linchin authored Jan 8, 2024
1 parent 132c14b commit 8585747
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 1 deletion.
2 changes: 2 additions & 0 deletions google/cloud/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
from google.cloud.bigquery.routine import RemoteFunctionOptions
from google.cloud.bigquery.schema import PolicyTagList
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.schema import FieldElementType
from google.cloud.bigquery.standard_sql import StandardSqlDataType
from google.cloud.bigquery.standard_sql import StandardSqlField
from google.cloud.bigquery.standard_sql import StandardSqlStructType
Expand Down Expand Up @@ -158,6 +159,7 @@
"RemoteFunctionOptions",
# Shared helpers
"SchemaField",
"FieldElementType",
"PolicyTagList",
"UDFResource",
"ExternalConfig",
Expand Down
73 changes: 72 additions & 1 deletion google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import collections
import enum
from typing import Any, Dict, Iterable, Optional, Union
from typing import Any, Dict, Iterable, Optional, Union, cast

from google.cloud.bigquery import standard_sql
from google.cloud.bigquery.enums import StandardSqlTypeNames
Expand Down Expand Up @@ -66,6 +66,46 @@ class _DefaultSentinel(enum.Enum):
_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE


class FieldElementType(object):
"""Represents the type of a field element.
Args:
element_type (str): The type of a field element.
"""

def __init__(self, element_type: str):
self._properties = {}
self._properties["type"] = element_type.upper()

@property
def element_type(self):
return self._properties.get("type")

@classmethod
def from_api_repr(cls, api_repr: Optional[dict]) -> Optional["FieldElementType"]:
"""Factory: construct a FieldElementType given its API representation.
Args:
api_repr (Dict[str, str]): field element type as returned from
the API.
Returns:
google.cloud.bigquery.FieldElementType:
Python object, as parsed from ``api_repr``.
"""
if not api_repr:
return None
return cls(api_repr["type"].upper())

def to_api_repr(self) -> dict:
"""Construct the API resource representation of this field element type.
Returns:
Dict[str, str]: Field element type represented as an API resource.
"""
return self._properties


class SchemaField(object):
"""Describe a single field within a table schema.
Expand Down Expand Up @@ -117,6 +157,12 @@ class SchemaField(object):
- Struct or array composed with the above allowed functions, for example:
"[CURRENT_DATE(), DATE '2020-01-01'"]
range_element_type: FieldElementType, str, Optional
The subtype of the RANGE, if the type of this field is RANGE. If
the type is RANGE, this field is required. Possible values for the
field element type of a RANGE include `DATE`, `DATETIME` and
`TIMESTAMP`.
"""

def __init__(
Expand All @@ -131,6 +177,7 @@ def __init__(
precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
range_element_type: Union[FieldElementType, str, None] = None,
):
self._properties: Dict[str, Any] = {
"name": name,
Expand All @@ -152,6 +199,11 @@ def __init__(
self._properties["policyTags"] = (
policy_tags.to_api_repr() if policy_tags is not None else None
)
if isinstance(range_element_type, str):
self._properties["rangeElementType"] = {"type": range_element_type}
if isinstance(range_element_type, FieldElementType):
self._properties["rangeElementType"] = range_element_type.to_api_repr()

self._fields = tuple(fields)

@staticmethod
Expand Down Expand Up @@ -186,6 +238,12 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
if policy_tags is not None and policy_tags is not _DEFAULT_VALUE:
policy_tags = PolicyTagList.from_api_repr(policy_tags)

if api_repr.get("rangeElementType"):
range_element_type = cast(dict, api_repr.get("rangeElementType"))
element_type = range_element_type.get("type")
else:
element_type = None

return cls(
field_type=field_type,
fields=[cls.from_api_repr(f) for f in fields],
Expand All @@ -197,6 +255,7 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
precision=cls.__get_int(api_repr, "precision"),
scale=cls.__get_int(api_repr, "scale"),
max_length=cls.__get_int(api_repr, "maxLength"),
range_element_type=element_type,
)

@property
Expand Down Expand Up @@ -252,6 +311,18 @@ def max_length(self):
"""Optional[int]: Maximum length for the STRING or BYTES field."""
return self._properties.get("maxLength")

@property
def range_element_type(self):
"""Optional[FieldElementType]: The subtype of the RANGE, if the
type of this field is RANGE.
Must be set when ``type`` is `"RANGE"`. Must be one of `"DATE"`,
`"DATETIME"` or `"TIMESTAMP"`.
"""
if self._properties.get("rangeElementType"):
ret = self._properties.get("rangeElementType")
return FieldElementType.from_api_repr(ret)

@property
def fields(self):
"""Optional[tuple]: Subfields contained in this field.
Expand Down
8 changes: 8 additions & 0 deletions tests/data/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@
"mode" : "NULLABLE",
"name" : "FavoriteNumber",
"type" : "NUMERIC"
},
{
"mode" : "NULLABLE",
"name" : "TimeRange",
"type" : "RANGE",
"rangeElementType": {
"type": "DATETIME"
}
}
]
}
84 changes: 84 additions & 0 deletions tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,36 @@ def test_constructor_subfields(self):
self.assertEqual(field.fields[0], sub_field1)
self.assertEqual(field.fields[1], sub_field2)

def test_constructor_range(self):
from google.cloud.bigquery.schema import FieldElementType

field = self._make_one(
"test",
"RANGE",
mode="REQUIRED",
description="Testing",
range_element_type=FieldElementType("DATETIME"),
)
self.assertEqual(field.name, "test")
self.assertEqual(field.field_type, "RANGE")
self.assertEqual(field.mode, "REQUIRED")
self.assertEqual(field.description, "Testing")
self.assertEqual(field.range_element_type.element_type, "DATETIME")

def test_constructor_range_str(self):
field = self._make_one(
"test",
"RANGE",
mode="REQUIRED",
description="Testing",
range_element_type="DATETIME",
)
self.assertEqual(field.name, "test")
self.assertEqual(field.field_type, "RANGE")
self.assertEqual(field.mode, "REQUIRED")
self.assertEqual(field.description, "Testing")
self.assertEqual(field.range_element_type.element_type, "DATETIME")

def test_to_api_repr(self):
from google.cloud.bigquery.schema import PolicyTagList

Expand Down Expand Up @@ -160,6 +190,7 @@ def test_from_api_repr(self):
self.assertEqual(field.fields[0].name, "bar")
self.assertEqual(field.fields[0].field_type, "INTEGER")
self.assertEqual(field.fields[0].mode, "NULLABLE")
self.assertEqual(field.range_element_type, None)

def test_from_api_repr_policy(self):
field = self._get_target_class().from_api_repr(
Expand All @@ -178,6 +209,23 @@ def test_from_api_repr_policy(self):
self.assertEqual(field.fields[0].field_type, "INTEGER")
self.assertEqual(field.fields[0].mode, "NULLABLE")

def test_from_api_repr_range(self):
field = self._get_target_class().from_api_repr(
{
"mode": "nullable",
"description": "test_range",
"name": "foo",
"type": "range",
"rangeElementType": {"type": "DATETIME"},
}
)
self.assertEqual(field.name, "foo")
self.assertEqual(field.field_type, "RANGE")
self.assertEqual(field.mode, "NULLABLE")
self.assertEqual(field.description, "test_range")
self.assertEqual(len(field.fields), 0)
self.assertEqual(field.range_element_type.element_type, "DATETIME")

def test_from_api_repr_defaults(self):
field = self._get_target_class().from_api_repr(
{"name": "foo", "type": "record"}
Expand All @@ -192,8 +240,10 @@ def test_from_api_repr_defaults(self):
# _properties.
self.assertIsNone(field.description)
self.assertIsNone(field.policy_tags)
self.assertIsNone(field.range_element_type)
self.assertNotIn("description", field._properties)
self.assertNotIn("policyTags", field._properties)
self.assertNotIn("rangeElementType", field._properties)

def test_name_property(self):
name = "lemon-ness"
Expand Down Expand Up @@ -566,6 +616,40 @@ def test___repr__evaluable_with_policy_tags(self):
assert field == evaled_field


class TestFieldElementType(unittest.TestCase):
@staticmethod
def _get_target_class():
from google.cloud.bigquery.schema import FieldElementType

return FieldElementType

def _make_one(self, *args):
return self._get_target_class()(*args)

def test_constructor(self):
element_type = self._make_one("DATETIME")
self.assertEqual(element_type.element_type, "DATETIME")
self.assertEqual(element_type._properties["type"], "DATETIME")

def test_to_api_repr(self):
element_type = self._make_one("DATETIME")
self.assertEqual(element_type.to_api_repr(), {"type": "DATETIME"})

def test_from_api_repr(self):
api_repr = {"type": "DATETIME"}
expected_element_type = self._make_one("DATETIME")
self.assertEqual(
expected_element_type.element_type,
self._get_target_class().from_api_repr(api_repr).element_type,
)

def test_from_api_repr_empty(self):
self.assertEqual(None, self._get_target_class().from_api_repr({}))

def test_from_api_repr_none(self):
self.assertEqual(None, self._get_target_class().from_api_repr(None))


# TODO: dedup with the same class in test_table.py.
class _SchemaBase(object):
def _verify_field(self, field, r_field):
Expand Down

0 comments on commit 8585747

Please sign in to comment.