Skip to content

Commit

Permalink
feat(bigquery): add support for hive partitioning options configurati…
Browse files Browse the repository at this point in the history
…on (#9626)

* feat(bigquery): add hive partitioning options to external config

* Mark ExternalConfig.options property as optional

* Support hive partitioning options in LoadJobConfig

* Mark hive partitioning class and propertis as beta
  • Loading branch information
plamut authored and tswast committed Nov 19, 2019
1 parent 0b69ee0 commit 15d4bb6
Show file tree
Hide file tree
Showing 4 changed files with 213 additions and 1 deletion.
94 changes: 93 additions & 1 deletion bigquery/google/cloud/bigquery/external_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,76 @@ def from_api_repr(cls, resource):
_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions)


class HivePartitioningOptions(object):
"""[Beta] Options that configure hive partitioning.
.. note::
**Experimental**. This feature is experimental and might change or
have limited support.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions
"""

def __init__(self):
self._properties = {}

@property
def mode(self):
"""Optional[str]: When set, what mode of hive partitioning to use when reading data.
Two modes are supported: "AUTO" and "STRINGS".
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode
"""
return self._properties.get("mode")

@mode.setter
def mode(self, value):
self._properties["mode"] = value

@property
def source_uri_prefix(self):
"""Optional[str]: When hive partition detection is requested, a common prefix for
all source URIs is required.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.source_uri_prefix
"""
return self._properties.get("sourceUriPrefix")

@source_uri_prefix.setter
def source_uri_prefix(self, value):
self._properties["sourceUriPrefix"] = value

def to_api_repr(self):
"""Build an API representation of this object.
Returns:
Dict[str, Any]: A dictionary in the format used by the BigQuery API.
"""
return copy.deepcopy(self._properties)

@classmethod
def from_api_repr(cls, resource):
"""Factory: construct a :class:`~.external_config.HivePartitioningOptions`
instance given its API representation.
Args:
resource (Dict[str, Any]):
Definition of a :class:`~.external_config.HivePartitioningOptions`
instance in the same representation as is returned from the
API.
Returns:
HivePartitioningOptions: Configuration parsed from ``resource``.
"""
config = cls()
config._properties = copy.deepcopy(resource)
return config


class ExternalConfig(object):
"""Description of an external data source.
Expand Down Expand Up @@ -571,7 +641,7 @@ def source_format(self):

@property
def options(self):
"""Dict[str, Any]: Source-specific options."""
"""Optional[Dict[str, Any]]: Source-specific options."""
return self._options

@property
Expand Down Expand Up @@ -601,6 +671,28 @@ def compression(self):
def compression(self, value):
self._properties["compression"] = value

@property
def hive_partitioning(self):
"""Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \
it configures hive partitioning support.
.. note::
**Experimental**. This feature is experimental and might change or
have limited support.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options
"""
prop = self._properties.get("hivePartitioningOptions")
if prop is None:
return None
return HivePartitioningOptions.from_api_repr(prop)

@hive_partitioning.setter
def hive_partitioning(self, value):
prop = value.to_api_repr() if value is not None else None
self._properties["hivePartitioningOptions"] = prop

@property
def ignore_unknown_values(self):
"""bool: If :data:`True`, extra values that are not represented in the
Expand Down
28 changes: 28 additions & 0 deletions bigquery/google/cloud/bigquery/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery.external_config import HivePartitioningOptions
from google.cloud.bigquery import _helpers
from google.cloud.bigquery.query import _query_param_from_api_repr
from google.cloud.bigquery.query import ArrayQueryParameter
Expand Down Expand Up @@ -1138,6 +1139,33 @@ def field_delimiter(self):
def field_delimiter(self, value):
self._set_sub_prop("fieldDelimiter", value)

@property
def hive_partitioning(self):
"""Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \
it configures hive partitioning support.
.. note::
**Experimental**. This feature is experimental and might change or
have limited support.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.hive_partitioning_options
"""
prop = self._get_sub_prop("hivePartitioningOptions")
if prop is None:
return None
return HivePartitioningOptions.from_api_repr(prop)

@hive_partitioning.setter
def hive_partitioning(self, value):
if value is not None:
if isinstance(value, HivePartitioningOptions):
value = value.to_api_repr()
else:
raise TypeError("Expected a HivePartitioningOptions instance or None.")

self._set_sub_prop("hivePartitioningOptions", value)

@property
def ignore_unknown_values(self):
"""bool: Ignore extra values not represented in the table schema.
Expand Down
52 changes: 52 additions & 0 deletions bigquery/tests/unit/test_external_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,58 @@ def test_to_api_repr_sheets(self):

self.assertEqual(got_resource, exp_resource)

def test_from_api_repr_hive_partitioning(self):
resource = _copy_and_update(
self.BASE_RESOURCE,
{
"sourceFormat": "FORMAT_FOO",
"hivePartitioningOptions": {
"sourceUriPrefix": "http://foo/bar",
"mode": "STRINGS",
},
},
)

ec = external_config.ExternalConfig.from_api_repr(resource)

self._verify_base(ec)
self.assertEqual(ec.source_format, "FORMAT_FOO")
self.assertIsInstance(
ec.hive_partitioning, external_config.HivePartitioningOptions
)
self.assertEqual(ec.hive_partitioning.source_uri_prefix, "http://foo/bar")
self.assertEqual(ec.hive_partitioning.mode, "STRINGS")

# converting back to API representation should yield the same result
got_resource = ec.to_api_repr()
self.assertEqual(got_resource, resource)

del resource["hivePartitioningOptions"]
ec = external_config.ExternalConfig.from_api_repr(resource)
self.assertIsNone(ec.hive_partitioning)

got_resource = ec.to_api_repr()
self.assertEqual(got_resource, resource)

def test_to_api_repr_hive_partitioning(self):
hive_partitioning = external_config.HivePartitioningOptions()
hive_partitioning.source_uri_prefix = "http://foo/bar"
hive_partitioning.mode = "STRINGS"

ec = external_config.ExternalConfig("FORMAT_FOO")
ec.hive_partitioning = hive_partitioning

got_resource = ec.to_api_repr()

expected_resource = {
"sourceFormat": "FORMAT_FOO",
"hivePartitioningOptions": {
"sourceUriPrefix": "http://foo/bar",
"mode": "STRINGS",
},
}
self.assertEqual(got_resource, expected_resource)

def test_from_api_repr_csv(self):
resource = _copy_and_update(
self.BASE_RESOURCE,
Expand Down
40 changes: 40 additions & 0 deletions bigquery/tests/unit/test_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -1448,6 +1448,46 @@ def test_field_delimiter_setter(self):
config.field_delimiter = field_delimiter
self.assertEqual(config._properties["load"]["fieldDelimiter"], field_delimiter)

def test_hive_partitioning_missing(self):
config = self._get_target_class()()
self.assertIsNone(config.hive_partitioning)

def test_hive_partitioning_hit(self):
from google.cloud.bigquery.external_config import HivePartitioningOptions

config = self._get_target_class()()
config._properties["load"]["hivePartitioningOptions"] = {
"sourceUriPrefix": "http://foo/bar",
"mode": "STRINGS",
}
result = config.hive_partitioning
self.assertIsInstance(result, HivePartitioningOptions)
self.assertEqual(result.source_uri_prefix, "http://foo/bar")
self.assertEqual(result.mode, "STRINGS")

def test_hive_partitioning_setter(self):
from google.cloud.bigquery.external_config import HivePartitioningOptions

hive_partitioning = HivePartitioningOptions()
hive_partitioning.source_uri_prefix = "http://foo/bar"
hive_partitioning.mode = "AUTO"

config = self._get_target_class()()
config.hive_partitioning = hive_partitioning
self.assertEqual(
config._properties["load"]["hivePartitioningOptions"],
{"sourceUriPrefix": "http://foo/bar", "mode": "AUTO"},
)

config.hive_partitioning = None
self.assertIsNone(config._properties["load"]["hivePartitioningOptions"])

def test_hive_partitioning_invalid_type(self):
config = self._get_target_class()()

with self.assertRaises(TypeError):
config.hive_partitioning = {"mode": "AUTO"}

def test_ignore_unknown_values_missing(self):
config = self._get_target_class()()
self.assertIsNone(config.ignore_unknown_values)
Expand Down

0 comments on commit 15d4bb6

Please sign in to comment.