diff --git a/bigquery/google/cloud/bigquery/external_config.py b/bigquery/google/cloud/bigquery/external_config.py index c637d37d185c..ea6a42c60cac 100644 --- a/bigquery/google/cloud/bigquery/external_config.py +++ b/bigquery/google/cloud/bigquery/external_config.py @@ -564,6 +564,76 @@ def from_api_repr(cls, resource): _OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions) +class HivePartitioningOptions(object): + """[Beta] Options that configure hive partitioning. + + .. note:: + **Experimental**. This feature is experimental and might change or + have limited support. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions + """ + + def __init__(self): + self._properties = {} + + @property + def mode(self): + """Optional[str]: When set, what mode of hive partitioning to use when reading data. + + Two modes are supported: "AUTO" and "STRINGS". + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode + """ + return self._properties.get("mode") + + @mode.setter + def mode(self, value): + self._properties["mode"] = value + + @property + def source_uri_prefix(self): + """Optional[str]: When hive partition detection is requested, a common prefix for + all source URIs is required. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.source_uri_prefix + """ + return self._properties.get("sourceUriPrefix") + + @source_uri_prefix.setter + def source_uri_prefix(self, value): + self._properties["sourceUriPrefix"] = value + + def to_api_repr(self): + """Build an API representation of this object. + + Returns: + Dict[str, Any]: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a :class:`~.external_config.HivePartitioningOptions` + instance given its API representation. + + Args: + resource (Dict[str, Any]): + Definition of a :class:`~.external_config.HivePartitioningOptions` + instance in the same representation as is returned from the + API. + + Returns: + HivePartitioningOptions: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + class ExternalConfig(object): """Description of an external data source. @@ -592,7 +662,7 @@ def source_format(self): @property def options(self): - """Dict[str, Any]: Source-specific options.""" + """Optional[Dict[str, Any]]: Source-specific options.""" return self._options @property @@ -624,6 +694,28 @@ def compression(self): def compression(self, value): self._properties["compression"] = value + @property + def hive_partitioning(self): + """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ + it configures hive partitioning support. + + .. note:: + **Experimental**. This feature is experimental and might change or + have limited support. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options + """ + prop = self._properties.get("hivePartitioningOptions") + if prop is None: + return None + return HivePartitioningOptions.from_api_repr(prop) + + @hive_partitioning.setter + def hive_partitioning(self, value): + prop = value.to_api_repr() if value is not None else None + self._properties["hivePartitioningOptions"] = prop + @property def ignore_unknown_values(self): """bool: If :data:`True`, extra values that are not represented in the diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index a8d797f4bef5..e150cc61ef79 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -29,6 +29,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery.external_config import HivePartitioningOptions from google.cloud.bigquery import _helpers from google.cloud.bigquery.query import _query_param_from_api_repr from google.cloud.bigquery.query import ArrayQueryParameter @@ -1138,6 +1139,33 @@ def field_delimiter(self): def field_delimiter(self, value): self._set_sub_prop("fieldDelimiter", value) + @property + def hive_partitioning(self): + """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ + it configures hive partitioning support. + + .. note:: + **Experimental**. This feature is experimental and might change or + have limited support. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.hive_partitioning_options + """ + prop = self._get_sub_prop("hivePartitioningOptions") + if prop is None: + return None + return HivePartitioningOptions.from_api_repr(prop) + + @hive_partitioning.setter + def hive_partitioning(self, value): + if value is not None: + if isinstance(value, HivePartitioningOptions): + value = value.to_api_repr() + else: + raise TypeError("Expected a HivePartitioningOptions instance or None.") + + self._set_sub_prop("hivePartitioningOptions", value) + @property def ignore_unknown_values(self): """bool: Ignore extra values not represented in the table schema. diff --git a/bigquery/tests/unit/test_external_config.py b/bigquery/tests/unit/test_external_config.py index dab4391cbe04..6028d069bcbe 100644 --- a/bigquery/tests/unit/test_external_config.py +++ b/bigquery/tests/unit/test_external_config.py @@ -173,6 +173,58 @@ def test_to_api_repr_sheets(self): self.assertEqual(got_resource, exp_resource) + def test_from_api_repr_hive_partitioning(self): + resource = _copy_and_update( + self.BASE_RESOURCE, + { + "sourceFormat": "FORMAT_FOO", + "hivePartitioningOptions": { + "sourceUriPrefix": "http://foo/bar", + "mode": "STRINGS", + }, + }, + ) + + ec = external_config.ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, "FORMAT_FOO") + self.assertIsInstance( + ec.hive_partitioning, external_config.HivePartitioningOptions + ) + self.assertEqual(ec.hive_partitioning.source_uri_prefix, "http://foo/bar") + self.assertEqual(ec.hive_partitioning.mode, "STRINGS") + + # converting back to API representation should yield the same result + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + del resource["hivePartitioningOptions"] + ec = external_config.ExternalConfig.from_api_repr(resource) + self.assertIsNone(ec.hive_partitioning) + + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + def test_to_api_repr_hive_partitioning(self): + hive_partitioning = external_config.HivePartitioningOptions() + hive_partitioning.source_uri_prefix = "http://foo/bar" + hive_partitioning.mode = "STRINGS" + + ec = external_config.ExternalConfig("FORMAT_FOO") + ec.hive_partitioning = hive_partitioning + + got_resource = ec.to_api_repr() + + expected_resource = { + "sourceFormat": "FORMAT_FOO", + "hivePartitioningOptions": { + "sourceUriPrefix": "http://foo/bar", + "mode": "STRINGS", + }, + } + self.assertEqual(got_resource, expected_resource) + def test_from_api_repr_csv(self): resource = _copy_and_update( self.BASE_RESOURCE, diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index a2aeb5efbc4a..b7596e4db848 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1448,6 +1448,46 @@ def test_field_delimiter_setter(self): config.field_delimiter = field_delimiter self.assertEqual(config._properties["load"]["fieldDelimiter"], field_delimiter) + def test_hive_partitioning_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.hive_partitioning) + + def test_hive_partitioning_hit(self): + from google.cloud.bigquery.external_config import HivePartitioningOptions + + config = self._get_target_class()() + config._properties["load"]["hivePartitioningOptions"] = { + "sourceUriPrefix": "http://foo/bar", + "mode": "STRINGS", + } + result = config.hive_partitioning + self.assertIsInstance(result, HivePartitioningOptions) + self.assertEqual(result.source_uri_prefix, "http://foo/bar") + self.assertEqual(result.mode, "STRINGS") + + def test_hive_partitioning_setter(self): + from google.cloud.bigquery.external_config import HivePartitioningOptions + + hive_partitioning = HivePartitioningOptions() + hive_partitioning.source_uri_prefix = "http://foo/bar" + hive_partitioning.mode = "AUTO" + + config = self._get_target_class()() + config.hive_partitioning = hive_partitioning + self.assertEqual( + config._properties["load"]["hivePartitioningOptions"], + {"sourceUriPrefix": "http://foo/bar", "mode": "AUTO"}, + ) + + config.hive_partitioning = None + self.assertIsNone(config._properties["load"]["hivePartitioningOptions"]) + + def test_hive_partitioning_invalid_type(self): + config = self._get_target_class()() + + with self.assertRaises(TypeError): + config.hive_partitioning = {"mode": "AUTO"} + def test_ignore_unknown_values_missing(self): config = self._get_target_class()() self.assertIsNone(config.ignore_unknown_values)