Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for AvroOptions #1630

Merged
merged 3 commits into from
Oct 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions google-cloud-bigquery/clirr-ignored-differences.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- see http://www.mojohaus.org/clirr-maven-plugin/examples/ignored-differences.html -->
<differences>
<difference>
<differenceType>7006</differenceType>
<className>com/google/cloud/bigquery/FormatOptions</className>
<method>com.google.cloud.bigquery.FormatOptions avro()</method>
<to>com.google.cloud.bigquery.AvroOptions</to>
</difference>
</differences>
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.cloud.bigquery;

import com.google.common.base.MoreObjects;
import java.util.Objects;

/**
* Google BigQuery options for AVRO format. This class wraps some properties of AVRO files used by
* BigQuery to parse external data.
*/
public final class AvroOptions extends FormatOptions {

private static final long serialVersionUID = 2293570529308612712L;

private final Boolean useAvroLogicalTypes;

public static final class Builder {

private Boolean useAvroLogicalTypes;

private Builder() {}

private Builder(AvroOptions avroOptions) {
this.useAvroLogicalTypes = avroOptions.useAvroLogicalTypes;
}

/**
* [Optional] Sets whether BigQuery should interpret logical types as the corresponding BigQuery
* data type (for example, TIMESTAMP), instead of using the raw type (for example, INTEGER).
*/
public Builder setUseAvroLogicalTypes(boolean useAvroLogicalTypes) {
this.useAvroLogicalTypes = useAvroLogicalTypes;
return this;
}

/** Creates a {@code AvroOptions} object. */
public AvroOptions build() {
return new AvroOptions(this);
}
}

private AvroOptions(Builder builder) {
super(FormatOptions.AVRO);
this.useAvroLogicalTypes = builder.useAvroLogicalTypes;
}

/**
* Returns whether BigQuery should interpret logical types as the corresponding BigQuery data type
* (for example, TIMESTAMP), instead of using the raw type (for example, INTEGER).
*/
public Boolean useAvroLogicalTypes() {
return useAvroLogicalTypes;
}

public Builder toBuilder() {
return new Builder(this);
}

@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("type", getType())
.add("useAvroLogicalTypes", useAvroLogicalTypes)
.toString();
}

@Override
public int hashCode() {
return Objects.hash(getType(), useAvroLogicalTypes);
}

@Override
public boolean equals(Object obj) {
return obj == this
|| obj instanceof AvroOptions && Objects.equals(toPb(), ((AvroOptions) obj).toPb());
}

com.google.api.services.bigquery.model.AvroOptions toPb() {
com.google.api.services.bigquery.model.AvroOptions avroOptions =
new com.google.api.services.bigquery.model.AvroOptions();
avroOptions.setUseAvroLogicalTypes(useAvroLogicalTypes);
return avroOptions;
}

/** Returns a builder for a AvroOptions object. */
public static AvroOptions.Builder newBuilder() {
return new AvroOptions.Builder();
}

static AvroOptions fromPb(com.google.api.services.bigquery.model.AvroOptions avroOptions) {
Builder builder = newBuilder();
if (avroOptions.getUseAvroLogicalTypes() != null) {
builder.setUseAvroLogicalTypes(avroOptions.getUseAvroLogicalTypes());
}
return builder.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,9 @@ com.google.api.services.bigquery.model.ExternalDataConfiguration toExternalDataC
if (getDecimalTargetTypes() != null) {
externalConfigurationPb.setDecimalTargetTypes(getDecimalTargetTypes());
}
if (getFormatOptions() != null && FormatOptions.AVRO.equals(getFormatOptions().getType())) {
externalConfigurationPb.setAvroOptions(((AvroOptions) getFormatOptions()).toPb());
}
if (getFormatOptions() != null && FormatOptions.CSV.equals(getFormatOptions().getType())) {
externalConfigurationPb.setCsvOptions(((CsvOptions) getFormatOptions()).toPb());
}
Expand Down Expand Up @@ -459,6 +462,9 @@ static ExternalTableDefinition fromPb(Table tablePb) {
builder.setConnectionId(externalDataConfiguration.getConnectionId());
}
builder.setIgnoreUnknownValues(externalDataConfiguration.getIgnoreUnknownValues());
if (externalDataConfiguration.getAvroOptions() != null) {
builder.setFormatOptions(AvroOptions.fromPb(externalDataConfiguration.getAvroOptions()));
}
if (externalDataConfiguration.getCsvOptions() != null) {
builder.setFormatOptions(CsvOptions.fromPb(externalDataConfiguration.getCsvOptions()));
}
Expand Down Expand Up @@ -508,6 +514,9 @@ static ExternalTableDefinition fromExternalDataConfiguration(
if (externalDataConfiguration.getIgnoreUnknownValues() != null) {
builder.setIgnoreUnknownValues(externalDataConfiguration.getIgnoreUnknownValues());
}
if (externalDataConfiguration.getAvroOptions() != null) {
builder.setFormatOptions(AvroOptions.fromPb(externalDataConfiguration.getAvroOptions()));
}
if (externalDataConfiguration.getCsvOptions() != null) {
builder.setFormatOptions(CsvOptions.fromPb(externalDataConfiguration.getCsvOptions()));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ public static FormatOptions datastoreBackup() {
}

/** Default options for AVRO format. */
public static FormatOptions avro() {
return new FormatOptions(AVRO);
public static AvroOptions avro() {
return AvroOptions.newBuilder().build();
}

/** Default options for BIGTABLE format. */
Expand Down Expand Up @@ -120,6 +120,8 @@ public static FormatOptions of(String format) {
checkArgument(!isNullOrEmpty(format), "Provided format is null or empty");
if (format.equals(CSV)) {
return csv();
} else if (format.equals(AVRO)) {
return avro();
stephaniewang526 marked this conversation as resolved.
Show resolved Hide resolved
} else if (format.equals(DATASTORE_BACKUP)) {
return datastoreBackup();
} else if (format.equals(GOOGLE_SHEETS)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.cloud.bigquery;

import static org.junit.Assert.assertEquals;

import org.junit.Test;

public class AvroOptionsTest {

private static final Boolean USE_AVRO_LOGICAL_TYPES = true;
private static final AvroOptions AVRO_OPTIONS =
AvroOptions.newBuilder().setUseAvroLogicalTypes(USE_AVRO_LOGICAL_TYPES).build();

@Test
public void testToBuilder() {
compareAvroOptions(AVRO_OPTIONS, AVRO_OPTIONS.toBuilder().build());
AvroOptions avroOptions = AVRO_OPTIONS.toBuilder().setUseAvroLogicalTypes(false).build();
assertEquals(false, avroOptions.useAvroLogicalTypes());
avroOptions = avroOptions.toBuilder().setUseAvroLogicalTypes(true).build();
compareAvroOptions(AVRO_OPTIONS, avroOptions);
}

@Test
public void testBuilder() {
assertEquals(FormatOptions.AVRO, AVRO_OPTIONS.getType());
assertEquals(USE_AVRO_LOGICAL_TYPES, AVRO_OPTIONS.useAvroLogicalTypes());
}

@Test
public void testToAndFromPb() {
compareAvroOptions(AVRO_OPTIONS, AvroOptions.fromPb(AVRO_OPTIONS.toPb()));
AvroOptions avroOptions =
AvroOptions.newBuilder().setUseAvroLogicalTypes(USE_AVRO_LOGICAL_TYPES).build();
compareAvroOptions(avroOptions, AvroOptions.fromPb(avroOptions.toPb()));
}

private void compareAvroOptions(AvroOptions expected, AvroOptions value) {
assertEquals(expected, value);
assertEquals(expected.useAvroLogicalTypes(), value.useAvroLogicalTypes());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ public class ExternalTableDefinitionTest {
private static final String COMPRESSION = "GZIP";
private static final String CONNECTION_ID = "123456789";
private static final Boolean AUTODETECT = true;
private static final AvroOptions AVRO_OPTIONS = AvroOptions.newBuilder().build();
private static final CsvOptions CSV_OPTIONS = CsvOptions.newBuilder().build();
private static final HivePartitioningOptions HIVE_PARTITIONING_OPTIONS =
HivePartitioningOptions.newBuilder()
Expand All @@ -67,6 +68,9 @@ public class ExternalTableDefinitionTest {
.setHivePartitioningOptions(HIVE_PARTITIONING_OPTIONS)
.build();

private static final ExternalTableDefinition EXTERNAL_TABLE_DEFINITION_AVRO =
ExternalTableDefinition.newBuilder(SOURCE_URIS, TABLE_SCHEMA, AVRO_OPTIONS).build();

@Test
public void testToBuilder() {
compareExternalTableDefinition(
Expand Down Expand Up @@ -109,6 +113,7 @@ public void testBuilder() {
assertEquals(TableDefinition.Type.EXTERNAL, EXTERNAL_TABLE_DEFINITION.getType());
assertEquals(COMPRESSION, EXTERNAL_TABLE_DEFINITION.getCompression());
assertEquals(CONNECTION_ID, EXTERNAL_TABLE_DEFINITION.getConnectionId());
assertEquals(AVRO_OPTIONS, EXTERNAL_TABLE_DEFINITION_AVRO.getFormatOptions());
assertEquals(CSV_OPTIONS, EXTERNAL_TABLE_DEFINITION.getFormatOptions());
assertEquals(IGNORE_UNKNOWN_VALUES, EXTERNAL_TABLE_DEFINITION.ignoreUnknownValues());
assertEquals(MAX_BAD_RECORDS, EXTERNAL_TABLE_DEFINITION.getMaxBadRecords());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public class LoadJobConfigurationTest {
ImmutableList.of(SchemaUpdateOption.ALLOW_FIELD_ADDITION);
private static final Schema TABLE_SCHEMA = Schema.of(FIELD_SCHEMA);
private static final Boolean AUTODETECT = true;
private static final Boolean USERAVROLOGICALTYPES = true;
private static final Boolean USE_AVRO_LOGICAL_TYPES = true;
private static final EncryptionConfiguration JOB_ENCRYPTION_CONFIGURATION =
EncryptionConfiguration.newBuilder().setKmsKeyName("KMS_KEY_1").build();
private static final TimePartitioning TIME_PARTITIONING = TimePartitioning.of(Type.DAY);
Expand Down Expand Up @@ -128,7 +128,7 @@ public class LoadJobConfigurationTest {
.setDestinationEncryptionConfiguration(JOB_ENCRYPTION_CONFIGURATION)
.setTimePartitioning(TIME_PARTITIONING)
.setClustering(CLUSTERING)
.setUseAvroLogicalTypes(USERAVROLOGICALTYPES)
.setUseAvroLogicalTypes(USE_AVRO_LOGICAL_TYPES)
.setLabels(LABELS)
.setJobTimeoutMs(TIMEOUT)
.setRangePartitioning(RANGE_PARTITIONING)
Expand Down