Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update bigquery dependency and add support for BYTES datatype #1045

Merged
merged 2 commits into from
Jun 10, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gcloud-java-bigquery/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
<dependency>
<groupId>com.google.apis</groupId>
<artifactId>google-api-services-bigquery</artifactId>
<version>v2-rev270-1.21.0</version>
<version>v2-rev303-1.22.0</version>
<scope>compile</scope>
<exclusions>
<exclusion>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public final class CsvOptions extends FormatOptions {
private final String encoding;
private final String fieldDelimiter;
private final String quote;
private final Integer skipLeadingRows;
private final Long skipLeadingRows;

public static final class Builder {

Expand All @@ -43,18 +43,27 @@ public static final class Builder {
private String encoding;
private String fieldDelimiter;
private String quote;
private Integer skipLeadingRows;
private Long skipLeadingRows;

private Builder() {}

private Builder(CsvOptions csvOptions) {
this.allowJaggedRows = csvOptions.allowJaggedRows;
this.allowQuotedNewLines = csvOptions.allowQuotedNewLines;
this.encoding = csvOptions.encoding;
this.fieldDelimiter = csvOptions.fieldDelimiter;
this.quote = csvOptions.quote;
this.skipLeadingRows = csvOptions.skipLeadingRows;
}

/**
* Set whether BigQuery should accept rows that are missing trailing optional columns. If
* {@code true}, BigQuery treats missing trailing columns as null values. If {@code false},
* records with missing trailing columns are treated as bad records, and if there are too many
* bad records, an invalid error is returned in the job result. By default, rows with missing
* trailing columns are considered bad records.
*/
public Builder allowJaggedRows(Boolean allowJaggedRows) {
public Builder allowJaggedRows(boolean allowJaggedRows) {
this.allowJaggedRows = allowJaggedRows;
return this;
}
Expand All @@ -63,7 +72,7 @@ public Builder allowJaggedRows(Boolean allowJaggedRows) {
* Sets whether BigQuery should allow quoted data sections that contain newline characters in a
* CSV file. By default quoted newline are not allowed.
*/
public Builder allowQuotedNewLines(Boolean allowQuotedNewLines) {
public Builder allowQuotedNewLines(boolean allowQuotedNewLines) {
this.allowQuotedNewLines = allowQuotedNewLines;
return this;
}
Expand Down Expand Up @@ -104,7 +113,7 @@ public Builder fieldDelimiter(String fieldDelimiter) {
* string to ISO-8859-1 encoding, and then uses the first byte of the encoded string to split
* the data in its raw, binary state. The default value is a double-quote ('"'). If your data
* does not contain quoted sections, set the property value to an empty string. If your data
* contains quoted newline characters, you must also set {@link #allowQuotedNewLines(Boolean)}
* contains quoted newline characters, you must also set {@link #allowQuotedNewLines(boolean)}
* property to {@code true}.
*/
public Builder quote(String quote) {
Expand All @@ -117,7 +126,7 @@ public Builder quote(String quote) {
* data. The default value is 0. This property is useful if you have header rows in the file
* that should be skipped.
*/
public Builder skipLeadingRows(Integer skipLeadingRows) {
public Builder skipLeadingRows(long skipLeadingRows) {
this.skipLeadingRows = skipLeadingRows;
return this;
}
Expand Down Expand Up @@ -186,21 +195,15 @@ public String quote() {
* Returns the number of rows at the top of a CSV file that BigQuery will skip when reading the
* data.
*/
public Integer skipLeadingRows() {
public Long skipLeadingRows() {
return skipLeadingRows;
}

/**
* Returns a builder for the {@code CsvOptions} object.
*/
public Builder toBuilder() {
return new Builder()
.allowJaggedRows(allowJaggedRows)
.allowQuotedNewLines(allowQuotedNewLines)
.encoding(encoding)
.fieldDelimiter(fieldDelimiter)
.quote(quote)
.skipLeadingRows(skipLeadingRows);
return new Builder(this);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public static class Type implements Serializable {
private static final long serialVersionUID = 2841484762609576959L;

public enum Value {
STRING, INTEGER, FLOAT, BOOLEAN, TIMESTAMP, RECORD
BYTES, STRING, INTEGER, FLOAT, BOOLEAN, TIMESTAMP, RECORD
}

private final Value value;
Expand Down Expand Up @@ -108,6 +108,13 @@ public List<Field> fields() {
return fields;
}

/**
* Returns a {@link Value#BYTES} field value.
*/
public static Type bytes() {
return new Type(Value.BYTES);
}

/**
* Returns a {@link Value#STRING} field value.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.google.common.base.Function;
import com.google.common.base.MoreObjects;
import com.google.common.collect.Lists;
import com.google.common.io.BaseEncoding;

import java.io.Serializable;
import java.util.List;
Expand Down Expand Up @@ -54,7 +55,7 @@ public FieldValue apply(Object pb) {
public enum Attribute {
/**
* A primitive field value. A {@code FieldValue} is primitive when the corresponding field has
* type {@link Field.Type#bool()}, {@link Field.Type#string()},
* type {@link Field.Type#bytes()}, {@link Field.Type#bool()}, {@link Field.Type#string()},
* {@link Field.Type#floatingPoint()}, {@link Field.Type#integer()},
* {@link Field.Type#timestamp()} or the value is set to {@code null}.
*/
Expand All @@ -80,7 +81,7 @@ public enum Attribute {
* Returns the attribute of this Field Value.
*
* @return {@link Attribute#PRIMITIVE} if the field is a primitive type
* ({@link Field.Type#bool()}, {@link Field.Type#string()},
* ({@link Field.Type#bytes()}, {@link Field.Type#bool()}, {@link Field.Type#string()},
* {@link Field.Type#floatingPoint()}, {@link Field.Type#integer()},
* {@link Field.Type#timestamp()}) or is {@code null}. Returns {@link Attribute#REPEATED} if
* the corresponding field has ({@link Field.Mode#REPEATED}) mode. Returns
Expand Down Expand Up @@ -108,8 +109,8 @@ public Object value() {

/**
* Returns this field's value as a {@link String}. This method should only be used if the
* corresponding field has primitive type ({@link Field.Type#bool()}, {@link Field.Type#string()},
* {@link Field.Type#floatingPoint()}, {@link Field.Type#integer()},
* corresponding field has primitive type ({@link Field.Type#bytes()}, {@link Field.Type#bool()},
* {@link Field.Type#string()}, {@link Field.Type#floatingPoint()}, {@link Field.Type#integer()},
* {@link Field.Type#timestamp()}).
*
* @throws ClassCastException if the field is not a primitive type
Expand All @@ -121,6 +122,22 @@ public String stringValue() {
return (String) value;
}

/**
* Returns this field's value as a byte array. This method should only be used if the
* corresponding field has primitive type ({@link Field.Type#bytes()}.
*
* @throws ClassCastException if the field is not a primitive type
* @throws NullPointerException if {@link #isNull()} returns {@code true}
* @throws IllegalStateException if the field value is not encoded in base64
*/
public byte[] bytesValue() {
try {
return BaseEncoding.base64().decode(stringValue());
} catch (IllegalArgumentException ex) {
throw new IllegalStateException(ex);
}
}

/**
* Returns this field's value as a {@code long}. This method should only be used if the
* corresponding field has {@link Field.Type#integer()} type.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ public final class InsertAllRequest implements Serializable {

/**
* A Google Big Query row to be inserted into a table. Each {@code RowToInsert} has an associated
* id used by BigQuery to detect duplicate insertion requests on a best-effort basis.
* id used by BigQuery to detect duplicate insertion requests on a best-effort basis. Please
* notice that data for fields of type {@link Field.Type#bytes()} must be provided as a base64
* encoded string.
*
* <p>Example usage of creating a row to insert:
* <pre> {@code
Expand All @@ -58,8 +60,9 @@ public final class InsertAllRequest implements Serializable {
* recordContent.put("subfieldName1", "value");
* recordContent.put("subfieldName2", repeatedFieldValue);
* Map<String, Object> rowContent = new HashMap<String, Object>();
* rowContent.put("fieldName1", true);
* rowContent.put("fieldName2", recordContent);
* rowContent.put("booleanFieldName", true);
* rowContent.put("bytesFieldName", "DQ4KDQ==");
* rowContent.put("recordFieldName", recordContent);
* RowToInsert row = new RowToInsert("rowId", rowContent);
* }</pre>
*
Expand Down Expand Up @@ -116,7 +119,8 @@ public boolean equals(Object obj) {
}

/**
* Creates a row to be inserted with associated id.
* Creates a row to be inserted with associated id. Please notice that data for fields of type
* {@link Field.Type#bytes()} must be provided as a base64 encoded string.
*
* @param id id of the row, used to identify duplicates
* @param content the actual content of the row
Expand All @@ -126,7 +130,8 @@ public static RowToInsert of(String id, Map<String, Object> content) {
}

/**
* Creates a row to be inserted without associated id.
* Creates a row to be inserted without associated id. Please notice that data for fields of
* type {@link Field.Type#bytes()} must be provided as a base64 encoded string.
*
* @param content the actual content of the row
*/
Expand Down Expand Up @@ -174,7 +179,8 @@ public Builder addRow(RowToInsert rowToInsert) {
}

/**
* Adds a row to be inserted with associated id.
* Adds a row to be inserted with associated id. Please notice that data for fields of type
* {@link Field.Type#bytes()} must be provided as a base64 encoded string.
*
* <p>Example usage of adding a row with associated id:
* <pre> {@code
Expand All @@ -184,8 +190,9 @@ public Builder addRow(RowToInsert rowToInsert) {
* recordContent.put("subfieldName1", "value");
* recordContent.put("subfieldName2", repeatedFieldValue);
* Map<String, Object> rowContent = new HashMap<String, Object>();
* rowContent.put("fieldName1", true);
* rowContent.put("fieldName2", recordContent);
* rowContent.put("booleanFieldName", true);
* rowContent.put("bytesFieldName", "DQ4KDQ==");
* rowContent.put("recordFieldName", recordContent);
* builder.addRow("rowId", rowContent);
* }</pre>
*/
Expand All @@ -195,7 +202,8 @@ public Builder addRow(String id, Map<String, Object> content) {
}

/**
* Adds a row to be inserted without an associated id.
* Adds a row to be inserted without an associated id. Please notice that data for fields of
* type {@link Field.Type#bytes()} must be provided as a base64 encoded string.
*
* <p>Example usage of adding a row without an associated id:
* <pre> {@code
Expand All @@ -205,8 +213,9 @@ public Builder addRow(String id, Map<String, Object> content) {
* recordContent.put("subfieldName1", "value");
* recordContent.put("subfieldName2", repeatedFieldValue);
* Map<String, Object> rowContent = new HashMap<String, Object>();
* rowContent.put("fieldName1", true);
* rowContent.put("fieldName2", recordContent);
* rowContent.put("booleanFieldName", true);
* rowContent.put("bytesFieldName", "DQ4KDQ==");
* rowContent.put("recordFieldName", recordContent);
* builder.addRow(rowContent);
* }</pre>
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.google.api.services.bigquery.model.JobConfigurationLoad;
import com.google.common.base.MoreObjects.ToStringHelper;
import com.google.common.collect.ImmutableList;
import com.google.common.primitives.Ints;

import java.util.List;
import java.util.Objects;
Expand Down Expand Up @@ -97,12 +98,18 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
|| loadConfigurationPb.getQuote() != null
|| loadConfigurationPb.getSkipLeadingRows() != null) {
CsvOptions.Builder builder = CsvOptions.builder()
.allowJaggedRows(loadConfigurationPb.getAllowJaggedRows())
.allowQuotedNewLines(loadConfigurationPb.getAllowQuotedNewlines())
.encoding(loadConfigurationPb.getEncoding())
.fieldDelimiter(loadConfigurationPb.getFieldDelimiter())
.quote(loadConfigurationPb.getQuote())
.skipLeadingRows(loadConfigurationPb.getSkipLeadingRows());
.quote(loadConfigurationPb.getQuote());
if (loadConfigurationPb.getAllowJaggedRows() != null) {
builder.allowJaggedRows(loadConfigurationPb.getAllowJaggedRows());
}
if (loadConfigurationPb.getAllowQuotedNewlines() != null) {
builder.allowQuotedNewLines(loadConfigurationPb.getAllowQuotedNewlines());
}
if (loadConfigurationPb.getSkipLeadingRows() != null) {
builder.skipLeadingRows(loadConfigurationPb.getSkipLeadingRows());
}
this.formatOptions = builder.build();
}
this.maxBadRecords = loadConfigurationPb.getMaxBadRecords();
Expand Down Expand Up @@ -300,8 +307,11 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
.setAllowJaggedRows(csvOptions.allowJaggedRows())
.setAllowQuotedNewlines(csvOptions.allowQuotedNewLines())
.setEncoding(csvOptions.encoding())
.setQuote(csvOptions.quote())
.setSkipLeadingRows(csvOptions.skipLeadingRows());
.setQuote(csvOptions.quote());
if (csvOptions.skipLeadingRows() != null) {
// todo(mziccard) remove checked cast or comment when #1044 is closed
loadConfigurationPb.setSkipLeadingRows(Ints.checkedCast(csvOptions.skipLeadingRows()));
}
}
if (schema != null) {
loadConfigurationPb.setSchema(schema.toPb());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.google.cloud.bigquery.JobInfo.WriteDisposition;
import com.google.common.base.MoreObjects;
import com.google.common.collect.ImmutableList;
import com.google.common.primitives.Ints;

import java.io.Serializable;
import java.util.List;
Expand Down Expand Up @@ -90,12 +91,18 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
|| loadConfigurationPb.getQuote() != null
|| loadConfigurationPb.getSkipLeadingRows() != null) {
CsvOptions.Builder builder = CsvOptions.builder()
.allowJaggedRows(loadConfigurationPb.getAllowJaggedRows())
.allowQuotedNewLines(loadConfigurationPb.getAllowQuotedNewlines())
.encoding(loadConfigurationPb.getEncoding())
.fieldDelimiter(loadConfigurationPb.getFieldDelimiter())
.quote(loadConfigurationPb.getQuote())
.skipLeadingRows(loadConfigurationPb.getSkipLeadingRows());
.quote(loadConfigurationPb.getQuote());
if (loadConfigurationPb.getAllowJaggedRows() != null) {
builder.allowJaggedRows(loadConfigurationPb.getAllowJaggedRows());
}
if (loadConfigurationPb.getAllowQuotedNewlines() != null) {
builder.allowQuotedNewLines(loadConfigurationPb.getAllowQuotedNewlines());
}
if (loadConfigurationPb.getSkipLeadingRows() != null) {
builder.skipLeadingRows(loadConfigurationPb.getSkipLeadingRows());
}
this.formatOptions = builder.build();
}
this.maxBadRecords = loadConfigurationPb.getMaxBadRecords();
Expand Down Expand Up @@ -271,8 +278,11 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
.setAllowJaggedRows(csvOptions.allowJaggedRows())
.setAllowQuotedNewlines(csvOptions.allowQuotedNewLines())
.setEncoding(csvOptions.encoding())
.setQuote(csvOptions.quote())
.setSkipLeadingRows(csvOptions.skipLeadingRows());
.setQuote(csvOptions.quote());
if (csvOptions.skipLeadingRows() != null) {
// todo(mziccard) remove checked cast or comment when #1044 is closed
loadConfigurationPb.setSkipLeadingRows(Ints.checkedCast(csvOptions.skipLeadingRows()));
}
}
if (schema != null) {
loadConfigurationPb.setSchema(schema.toPb());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class CsvOptionsTest {
private static final Charset ENCODING = StandardCharsets.UTF_8;
private static final String FIELD_DELIMITER = ",";
private static final String QUOTE = "\"";
private static final Integer SKIP_LEADING_ROWS = 42;
private static final long SKIP_LEADING_ROWS = 42L;
private static final CsvOptions CSV_OPTIONS = CsvOptions.builder()
.allowJaggedRows(ALLOW_JAGGED_ROWS)
.allowQuotedNewLines(ALLOW_QUOTED_NEWLINE)
Expand Down Expand Up @@ -65,7 +65,7 @@ public void testBuilder() {
assertEquals(ENCODING.name(), CSV_OPTIONS.encoding());
assertEquals(FIELD_DELIMITER, CSV_OPTIONS.fieldDelimiter());
assertEquals(QUOTE, CSV_OPTIONS.quote());
assertEquals(SKIP_LEADING_ROWS, CSV_OPTIONS.skipLeadingRows());
assertEquals(SKIP_LEADING_ROWS, (long) CSV_OPTIONS.skipLeadingRows());
}

@Test
Expand Down
Loading