From 7024e2d1fac0ca4cb2c25e7ff500c48857446694 Mon Sep 17 00:00:00 2001 From: stephwang Date: Thu, 30 Sep 2021 15:18:55 -0400 Subject: [PATCH 1/3] feat: add support for AvroOptions --- .../google/cloud/bigquery/AvroOptions.java | 112 ++++++++++++++++++ .../bigquery/ExternalTableDefinition.java | 9 ++ .../google/cloud/bigquery/FormatOptions.java | 2 + .../cloud/bigquery/AvroOptionsTest.java | 56 +++++++++ .../bigquery/ExternalTableDefinitionTest.java | 5 + .../bigquery/LoadJobConfigurationTest.java | 4 +- 6 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/AvroOptions.java create mode 100644 google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/AvroOptionsTest.java diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/AvroOptions.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/AvroOptions.java new file mode 100644 index 000000000..dd5964f2d --- /dev/null +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/AvroOptions.java @@ -0,0 +1,112 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.bigquery; + +import com.google.common.base.MoreObjects; +import java.util.Objects; + +/** + * Google BigQuery options for AVRO format. This class wraps some properties of AVRO files used by + * BigQuery to parse external data. + */ +public final class AvroOptions extends FormatOptions { + + private static final long serialVersionUID = 2293570529308612712L; + + private final Boolean useAvroLogicalTypes; + + public static final class Builder { + + private Boolean useAvroLogicalTypes; + + private Builder() {} + + private Builder(AvroOptions avroOptions) { + this.useAvroLogicalTypes = avroOptions.useAvroLogicalTypes; + } + + /** + * [Optional] Sets whether BigQuery should interpret logical types as the corresponding BigQuery + * data type (for example, TIMESTAMP), instead of using the raw type (for example, INTEGER). + */ + public Builder setUseAvroLogicalTypes(boolean useAvroLogicalTypes) { + this.useAvroLogicalTypes = useAvroLogicalTypes; + return this; + } + + /** Creates a {@code AvroOptions} object. */ + public AvroOptions build() { + return new AvroOptions(this); + } + } + + private AvroOptions(Builder builder) { + super(FormatOptions.AVRO); + this.useAvroLogicalTypes = builder.useAvroLogicalTypes; + } + + /** + * Returns whether BigQuery should interpret logical types as the corresponding BigQuery data type + * (for example, TIMESTAMP), instead of using the raw type (for example, INTEGER). + */ + public Boolean useAvroLogicalTypes() { + return useAvroLogicalTypes; + } + + public Builder toBuilder() { + return new Builder(this); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("type", getType()) + .add("useAvroLogicalTypes", useAvroLogicalTypes) + .toString(); + } + + @Override + public int hashCode() { + return Objects.hash(getType(), useAvroLogicalTypes); + } + + @Override + public boolean equals(Object obj) { + return obj == this + || obj instanceof AvroOptions && Objects.equals(toPb(), ((AvroOptions) obj).toPb()); + } + + com.google.api.services.bigquery.model.AvroOptions toPb() { + com.google.api.services.bigquery.model.AvroOptions avroOptions = + new com.google.api.services.bigquery.model.AvroOptions(); + avroOptions.setUseAvroLogicalTypes(useAvroLogicalTypes); + return avroOptions; + } + + /** Returns a builder for a AvroOptions object. */ + public static AvroOptions.Builder newBuilder() { + return new AvroOptions.Builder(); + } + + static AvroOptions fromPb(com.google.api.services.bigquery.model.AvroOptions avroOptions) { + Builder builder = newBuilder(); + if (avroOptions.getUseAvroLogicalTypes() != null) { + builder.setUseAvroLogicalTypes(avroOptions.getUseAvroLogicalTypes()); + } + return builder.build(); + } +} diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java index 2b68a7ff2..6ca64a8d7 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java @@ -300,6 +300,9 @@ com.google.api.services.bigquery.model.ExternalDataConfiguration toExternalDataC if (getDecimalTargetTypes() != null) { externalConfigurationPb.setDecimalTargetTypes(getDecimalTargetTypes()); } + if (getFormatOptions() != null && FormatOptions.AVRO.equals(getFormatOptions().getType())) { + externalConfigurationPb.setAvroOptions(((AvroOptions) getFormatOptions()).toPb()); + } if (getFormatOptions() != null && FormatOptions.CSV.equals(getFormatOptions().getType())) { externalConfigurationPb.setCsvOptions(((CsvOptions) getFormatOptions()).toPb()); } @@ -459,6 +462,9 @@ static ExternalTableDefinition fromPb(Table tablePb) { builder.setConnectionId(externalDataConfiguration.getConnectionId()); } builder.setIgnoreUnknownValues(externalDataConfiguration.getIgnoreUnknownValues()); + if (externalDataConfiguration.getAvroOptions() != null) { + builder.setFormatOptions(AvroOptions.fromPb(externalDataConfiguration.getAvroOptions())); + } if (externalDataConfiguration.getCsvOptions() != null) { builder.setFormatOptions(CsvOptions.fromPb(externalDataConfiguration.getCsvOptions())); } @@ -508,6 +514,9 @@ static ExternalTableDefinition fromExternalDataConfiguration( if (externalDataConfiguration.getIgnoreUnknownValues() != null) { builder.setIgnoreUnknownValues(externalDataConfiguration.getIgnoreUnknownValues()); } + if (externalDataConfiguration.getAvroOptions() != null) { + builder.setFormatOptions(AvroOptions.fromPb(externalDataConfiguration.getAvroOptions())); + } if (externalDataConfiguration.getCsvOptions() != null) { builder.setFormatOptions(CsvOptions.fromPb(externalDataConfiguration.getCsvOptions())); } diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/FormatOptions.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/FormatOptions.java index 92a57fc8f..b12918cd3 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/FormatOptions.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/FormatOptions.java @@ -120,6 +120,8 @@ public static FormatOptions of(String format) { checkArgument(!isNullOrEmpty(format), "Provided format is null or empty"); if (format.equals(CSV)) { return csv(); + } else if (format.equals(AVRO)) { + return avro(); } else if (format.equals(DATASTORE_BACKUP)) { return datastoreBackup(); } else if (format.equals(GOOGLE_SHEETS)) { diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/AvroOptionsTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/AvroOptionsTest.java new file mode 100644 index 000000000..f40660fd7 --- /dev/null +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/AvroOptionsTest.java @@ -0,0 +1,56 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.bigquery; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +public class AvroOptionsTest { + + private static final Boolean USE_AVRO_LOGICAL_TYPES = true; + private static final AvroOptions AVRO_OPTIONS = + AvroOptions.newBuilder().setUseAvroLogicalTypes(USE_AVRO_LOGICAL_TYPES).build(); + + @Test + public void testToBuilder() { + compareAvroOptions(AVRO_OPTIONS, AVRO_OPTIONS.toBuilder().build()); + AvroOptions avroOptions = AVRO_OPTIONS.toBuilder().setUseAvroLogicalTypes(false).build(); + assertEquals(false, avroOptions.useAvroLogicalTypes()); + avroOptions = avroOptions.toBuilder().setUseAvroLogicalTypes(true).build(); + compareAvroOptions(AVRO_OPTIONS, avroOptions); + } + + @Test + public void testBuilder() { + assertEquals(FormatOptions.AVRO, AVRO_OPTIONS.getType()); + assertEquals(USE_AVRO_LOGICAL_TYPES, AVRO_OPTIONS.useAvroLogicalTypes()); + } + + @Test + public void testToAndFromPb() { + compareAvroOptions(AVRO_OPTIONS, AvroOptions.fromPb(AVRO_OPTIONS.toPb())); + AvroOptions avroOptions = + AvroOptions.newBuilder().setUseAvroLogicalTypes(USE_AVRO_LOGICAL_TYPES).build(); + compareAvroOptions(avroOptions, AvroOptions.fromPb(avroOptions.toPb())); + } + + private void compareAvroOptions(AvroOptions expected, AvroOptions value) { + assertEquals(expected, value); + assertEquals(expected.useAvroLogicalTypes(), value.useAvroLogicalTypes()); + } +} diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java index 38dcd2714..b7b9531e5 100644 --- a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java @@ -50,6 +50,7 @@ public class ExternalTableDefinitionTest { private static final String COMPRESSION = "GZIP"; private static final String CONNECTION_ID = "123456789"; private static final Boolean AUTODETECT = true; + private static final AvroOptions AVRO_OPTIONS = AvroOptions.newBuilder().build(); private static final CsvOptions CSV_OPTIONS = CsvOptions.newBuilder().build(); private static final HivePartitioningOptions HIVE_PARTITIONING_OPTIONS = HivePartitioningOptions.newBuilder() @@ -67,6 +68,9 @@ public class ExternalTableDefinitionTest { .setHivePartitioningOptions(HIVE_PARTITIONING_OPTIONS) .build(); + private static final ExternalTableDefinition EXTERNAL_TABLE_DEFINITION_AVRO = + ExternalTableDefinition.newBuilder(SOURCE_URIS, TABLE_SCHEMA, AVRO_OPTIONS).build(); + @Test public void testToBuilder() { compareExternalTableDefinition( @@ -109,6 +113,7 @@ public void testBuilder() { assertEquals(TableDefinition.Type.EXTERNAL, EXTERNAL_TABLE_DEFINITION.getType()); assertEquals(COMPRESSION, EXTERNAL_TABLE_DEFINITION.getCompression()); assertEquals(CONNECTION_ID, EXTERNAL_TABLE_DEFINITION.getConnectionId()); + assertEquals(AVRO_OPTIONS, EXTERNAL_TABLE_DEFINITION_AVRO.getFormatOptions()); assertEquals(CSV_OPTIONS, EXTERNAL_TABLE_DEFINITION.getFormatOptions()); assertEquals(IGNORE_UNKNOWN_VALUES, EXTERNAL_TABLE_DEFINITION.ignoreUnknownValues()); assertEquals(MAX_BAD_RECORDS, EXTERNAL_TABLE_DEFINITION.getMaxBadRecords()); diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java index a2f164f8a..deed2f11b 100644 --- a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java @@ -56,7 +56,7 @@ public class LoadJobConfigurationTest { ImmutableList.of(SchemaUpdateOption.ALLOW_FIELD_ADDITION); private static final Schema TABLE_SCHEMA = Schema.of(FIELD_SCHEMA); private static final Boolean AUTODETECT = true; - private static final Boolean USERAVROLOGICALTYPES = true; + private static final Boolean USE_AVRO_LOGICAL_TYPES = true; private static final EncryptionConfiguration JOB_ENCRYPTION_CONFIGURATION = EncryptionConfiguration.newBuilder().setKmsKeyName("KMS_KEY_1").build(); private static final TimePartitioning TIME_PARTITIONING = TimePartitioning.of(Type.DAY); @@ -128,7 +128,7 @@ public class LoadJobConfigurationTest { .setDestinationEncryptionConfiguration(JOB_ENCRYPTION_CONFIGURATION) .setTimePartitioning(TIME_PARTITIONING) .setClustering(CLUSTERING) - .setUseAvroLogicalTypes(USERAVROLOGICALTYPES) + .setUseAvroLogicalTypes(USE_AVRO_LOGICAL_TYPES) .setLabels(LABELS) .setJobTimeoutMs(TIMEOUT) .setRangePartitioning(RANGE_PARTITIONING) From d00cf4ea817d9c9cd5e17ecd678110a0c560f94e Mon Sep 17 00:00:00 2001 From: stephwang Date: Fri, 1 Oct 2021 14:09:33 -0400 Subject: [PATCH 2/3] update avro() function in FormatOptions --- .../main/java/com/google/cloud/bigquery/FormatOptions.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/FormatOptions.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/FormatOptions.java index b12918cd3..c4f411175 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/FormatOptions.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/FormatOptions.java @@ -91,8 +91,8 @@ public static FormatOptions datastoreBackup() { } /** Default options for AVRO format. */ - public static FormatOptions avro() { - return new FormatOptions(AVRO); + public static AvroOptions avro() { + return AvroOptions.newBuilder().build(); } /** Default options for BIGTABLE format. */ From f06e29d124262108740f33d8b7df05d131f99ffd Mon Sep 17 00:00:00 2001 From: stephwang Date: Fri, 1 Oct 2021 14:20:10 -0400 Subject: [PATCH 3/3] fix clirr error --- google-cloud-bigquery/clirr-ignored-differences.xml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 google-cloud-bigquery/clirr-ignored-differences.xml diff --git a/google-cloud-bigquery/clirr-ignored-differences.xml b/google-cloud-bigquery/clirr-ignored-differences.xml new file mode 100644 index 000000000..5dba6ab7a --- /dev/null +++ b/google-cloud-bigquery/clirr-ignored-differences.xml @@ -0,0 +1,10 @@ + + + + + 7006 + com/google/cloud/bigquery/FormatOptions + com.google.cloud.bigquery.FormatOptions avro() + com.google.cloud.bigquery.AvroOptions + +