Skip to content

Commit

Permalink
feat: support load job option ColumnNameCharacterMap (#1952)
Browse files Browse the repository at this point in the history
* feat: support load job option ColumnNameCharacterMap

* add unit test
  • Loading branch information
Linchin authored Jun 14, 2024
1 parent 5d10f1e commit 7e522ee
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 0 deletions.
41 changes: 41 additions & 0 deletions google/cloud/bigquery/job/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,26 @@
from google.cloud.bigquery.query import ConnectionProperty


class ColumnNameCharacterMap:
"""Indicates the character map used for column names.
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#columnnamecharactermap
"""

COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED = "COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED"
"""Unspecified column name character map."""

STRICT = "STRICT"
"""Support flexible column name and reject invalid column names."""

V1 = "V1"
""" Support alphanumeric + underscore characters and names must start with
a letter or underscore. Invalid column names will be normalized."""

V2 = "V2"
"""Support flexible column name. Invalid column names will be normalized."""


class LoadJobConfig(_JobConfig):
"""Configuration options for load jobs.
Expand Down Expand Up @@ -597,6 +617,27 @@ def parquet_options(self, value):
else:
self._del_sub_prop("parquetOptions")

@property
def column_name_character_map(self) -> str:
"""Optional[google.cloud.bigquery.job.ColumnNameCharacterMap]:
Character map supported for column names in CSV/Parquet loads. Defaults
to STRICT and can be overridden by Project Config Service. Using this
option with unsupported load formats will result in an error.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.column_name_character_map
"""
return self._get_sub_prop(
"columnNameCharacterMap",
ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED,
)

@column_name_character_map.setter
def column_name_character_map(self, value: Optional[str]):
if value is None:
value = ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED
self._set_sub_prop("columnNameCharacterMap", value)


class LoadJob(_AsyncJob):
"""Asynchronous job for loading data into a table.
Expand Down
39 changes: 39 additions & 0 deletions tests/unit/job/test_load_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -843,3 +843,42 @@ def test_parquet_options_setter_clearing(self):

config.parquet_options = None
self.assertNotIn("parquetOptions", config._properties["load"])

def test_column_name_character_map_missing(self):
from google.cloud.bigquery.job.load import ColumnNameCharacterMap

config = self._get_target_class()()
self.assertEqual(
config.column_name_character_map,
ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED,
)

def test_column_name_character_map_hit(self):
from google.cloud.bigquery.job.load import ColumnNameCharacterMap

config = self._get_target_class()()
config._properties["load"]["columnNameCharacterMap"] = "STRICT"
self.assertEqual(
config.column_name_character_map,
ColumnNameCharacterMap.STRICT,
)

def test_column_name_character_map_setter(self):
from google.cloud.bigquery.job.load import ColumnNameCharacterMap

config = self._get_target_class()()
config.column_name_character_map = "V1"
self.assertEqual(
config._properties["load"]["columnNameCharacterMap"],
ColumnNameCharacterMap.V1,
)

def test_column_name_character_map_none(self):
from google.cloud.bigquery.job.load import ColumnNameCharacterMap

config = self._get_target_class()()
config.column_name_character_map = None
self.assertEqual(
config._properties["load"]["columnNameCharacterMap"],
ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED,
)

0 comments on commit 7e522ee

Please sign in to comment.