Skip to content

Commit

Permalink
feat: add additional parameters to CsvOptions and ParquetOptions (#3370)
Browse files Browse the repository at this point in the history
* feat: add additional parameters to CsvOptions and ParquetOptions

* fix lint

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* Addressed review comments

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
PhongChuong and gcf-owl-bot[bot] authored Jun 28, 2024
1 parent ceb270c commit 34f16fb
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@
*/
public final class CsvOptions extends FormatOptions {

private static final long serialVersionUID = 2193570529308612708L;
private static final long serialVersionUID = 2193570529308612709L;

private final Boolean allowJaggedRows;
private final Boolean allowQuotedNewLines;
private final String encoding;
private final String fieldDelimiter;
private final String nullMarker;
private final String quote;
private final Long skipLeadingRows;
private final Boolean preserveAsciiControlCharacters;
Expand All @@ -42,6 +43,7 @@ public static final class Builder {
private Boolean allowQuotedNewLines;
private String encoding;
private String fieldDelimiter;
private String nullMarker;
private String quote;
private Long skipLeadingRows;
private Boolean preserveAsciiControlCharacters;
Expand All @@ -53,6 +55,7 @@ private Builder(CsvOptions csvOptions) {
this.allowQuotedNewLines = csvOptions.allowQuotedNewLines;
this.encoding = csvOptions.encoding;
this.fieldDelimiter = csvOptions.fieldDelimiter;
this.nullMarker = csvOptions.nullMarker;
this.quote = csvOptions.quote;
this.skipLeadingRows = csvOptions.skipLeadingRows;
this.preserveAsciiControlCharacters = csvOptions.preserveAsciiControlCharacters;
Expand Down Expand Up @@ -110,6 +113,18 @@ public Builder setFieldDelimiter(String fieldDelimiter) {
return this;
}

/**
* [Optional] Specifies a string that represents a null value in a CSV file. For example, if you
* specify \"\\N\", BigQuery interprets \"\\N\" as a null value when querying a CSV file. The
* default value is the empty string. If you set this property to a custom value, BigQuery
* throws an error if an empty string is present for all data types except for STRING and BYTE.
* For STRING and BYTE columns, BigQuery interprets the empty string as an empty value.
*/
public Builder setNullMarker(String nullMarker) {
this.nullMarker = nullMarker;
return this;
}

/**
* Sets the value that is used to quote data sections in a CSV file. BigQuery converts the
* string to ISO-8859-1 encoding, and then uses the first byte of the encoded string to split
Expand Down Expand Up @@ -154,6 +169,7 @@ private CsvOptions(Builder builder) {
this.allowQuotedNewLines = builder.allowQuotedNewLines;
this.encoding = builder.encoding;
this.fieldDelimiter = builder.fieldDelimiter;
this.nullMarker = builder.nullMarker;
this.quote = builder.quote;
this.skipLeadingRows = builder.skipLeadingRows;
this.preserveAsciiControlCharacters = builder.preserveAsciiControlCharacters;
Expand Down Expand Up @@ -192,6 +208,11 @@ public String getFieldDelimiter() {
return fieldDelimiter;
}

/** Returns the string that represents a null value in a CSV file. */
public String getNullMarker() {
return nullMarker;
}

/** Returns the value that is used to quote data sections in a CSV file. */
public String getQuote() {
return quote;
Expand Down Expand Up @@ -226,6 +247,7 @@ public String toString() {
.add("allowQuotedNewLines", allowQuotedNewLines)
.add("encoding", encoding)
.add("fieldDelimiter", fieldDelimiter)
.add("nullMarker", nullMarker)
.add("quote", quote)
.add("skipLeadingRows", skipLeadingRows)
.add("preserveAsciiControlCharacters", preserveAsciiControlCharacters)
Expand All @@ -240,6 +262,7 @@ public int hashCode() {
allowQuotedNewLines,
encoding,
fieldDelimiter,
nullMarker,
quote,
skipLeadingRows,
preserveAsciiControlCharacters);
Expand All @@ -258,6 +281,7 @@ com.google.api.services.bigquery.model.CsvOptions toPb() {
csvOptions.setAllowQuotedNewlines(allowQuotedNewLines);
csvOptions.setEncoding(encoding);
csvOptions.setFieldDelimiter(fieldDelimiter);
csvOptions.setNullMarker(nullMarker);
csvOptions.setQuote(quote);
csvOptions.setSkipLeadingRows(skipLeadingRows);
csvOptions.setPreserveAsciiControlCharacters(preserveAsciiControlCharacters);
Expand All @@ -283,6 +307,9 @@ static CsvOptions fromPb(com.google.api.services.bigquery.model.CsvOptions csvOp
if (csvOptions.getFieldDelimiter() != null) {
builder.setFieldDelimiter(csvOptions.getFieldDelimiter());
}
if (csvOptions.getNullMarker() != null) {
builder.setNullMarker(csvOptions.getNullMarker());
}
if (csvOptions.getQuote() != null) {
builder.setQuote(csvOptions.getQuote());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@

public class ParquetOptions extends FormatOptions {

private static final long serialVersionUID = 1992L;
private static final long serialVersionUID = 1993L;

private final Boolean enableListInference;
private final Boolean enumAsString;
private final String mapTargetType;

public Boolean getEnableListInference() {
return enableListInference;
Expand All @@ -34,16 +35,23 @@ public Boolean getEnumAsString() {
return enumAsString;
}

/** Returns how the Parquet map is represented. */
public String getMapTargetType() {
return mapTargetType;
}

/** A builder for {@code ParquetOptions} objects. */
public static final class Builder {
private Boolean enableListInference;
private Boolean enumAsString;
private String mapTargetType;

private Builder() {}

private Builder(ParquetOptions parquetOptions) {
this.enableListInference = parquetOptions.enableListInference;
this.enumAsString = parquetOptions.enumAsString;
this.mapTargetType = parquetOptions.mapTargetType;
}

public Builder setEnableListInference(Boolean enableListInference) {
Expand All @@ -56,6 +64,17 @@ public Builder setEnumAsString(Boolean enumAsString) {
return this;
}

/**
* [Optional] Indicates how to represent a Parquet map if present.
*
* @see <a href="https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#maptargettype">
* MapTargetType</a>
*/
public Builder setMapTargetType(String mapTargetType) {
this.mapTargetType = mapTargetType;
return this;
}

public ParquetOptions build() {
return new ParquetOptions(this);
}
Expand All @@ -69,19 +88,21 @@ public Builder toBuilder() {
super(FormatOptions.PARQUET);
enableListInference = builder.enableListInference;
enumAsString = builder.enumAsString;
mapTargetType = builder.mapTargetType;
}

@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("enableListInference", enableListInference)
.add("enumAsString", enumAsString)
.add("mapTargetType", mapTargetType)
.toString();
}

@Override
public final int hashCode() {
return Objects.hash(enableListInference, enumAsString);
return Objects.hash(enableListInference, enumAsString, mapTargetType);
}

@Override
Expand All @@ -93,7 +114,9 @@ public final boolean equals(Object obj) {
return false;
}
ParquetOptions other = (ParquetOptions) obj;
return enableListInference == other.enableListInference && enumAsString == other.enumAsString;
return enableListInference == other.enableListInference
&& enumAsString == other.enumAsString
&& Objects.equals(mapTargetType, ((ParquetOptions) obj).getMapTargetType());
}

/** Returns a builder for a {@link ParquetOptions} object. */
Expand All @@ -110,6 +133,9 @@ static ParquetOptions fromPb(
if (parquetOptions.getEnumAsString() != null) {
builder.setEnumAsString(parquetOptions.getEnumAsString());
}
if (parquetOptions.getMapTargetType() != null) {
builder.setMapTargetType(parquetOptions.getMapTargetType());
}
return builder.build();
}

Expand All @@ -122,6 +148,9 @@ com.google.api.services.bigquery.model.ParquetOptions toPb() {
if (enumAsString != null) {
parquetOptions.setEnumAsString(enumAsString);
}
if (mapTargetType != null) {
parquetOptions.setMapTargetType(mapTargetType);
}
return parquetOptions;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public class CsvOptionsTest {
private static final Boolean ALLOW_QUOTED_NEWLINE = true;
private static final Charset ENCODING = StandardCharsets.UTF_8;
private static final String FIELD_DELIMITER = ",";
private static final String NULL_MARKER = "\\N";
private static final String QUOTE = "\"";
private static final long SKIP_LEADING_ROWS = 42L;

Expand All @@ -38,6 +39,7 @@ public class CsvOptionsTest {
.setAllowQuotedNewLines(ALLOW_QUOTED_NEWLINE)
.setEncoding(ENCODING)
.setFieldDelimiter(FIELD_DELIMITER)
.setNullMarker(NULL_MARKER)
.setQuote(QUOTE)
.setSkipLeadingRows(SKIP_LEADING_ROWS)
.setPreserveAsciiControlCharacters(PRESERVE_ASCII_CONTROL_CHARACTERS)
Expand Down Expand Up @@ -65,6 +67,7 @@ public void testBuilder() {
assertEquals(ALLOW_QUOTED_NEWLINE, CSV_OPTIONS.allowQuotedNewLines());
assertEquals(ENCODING.name(), CSV_OPTIONS.getEncoding());
assertEquals(FIELD_DELIMITER, CSV_OPTIONS.getFieldDelimiter());
assertEquals(NULL_MARKER, CSV_OPTIONS.getNullMarker());
assertEquals(QUOTE, CSV_OPTIONS.getQuote());
assertEquals(SKIP_LEADING_ROWS, (long) CSV_OPTIONS.getSkipLeadingRows());
assertEquals(
Expand All @@ -84,6 +87,7 @@ private void compareCsvOptions(CsvOptions expected, CsvOptions value) {
assertEquals(expected.allowQuotedNewLines(), value.allowQuotedNewLines());
assertEquals(expected.getEncoding(), value.getEncoding());
assertEquals(expected.getFieldDelimiter(), value.getFieldDelimiter());
assertEquals(expected.getNullMarker(), value.getNullMarker());
assertEquals(expected.getQuote(), value.getQuote());
assertEquals(expected.getSkipLeadingRows(), value.getSkipLeadingRows());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,23 @@
public class ParquetOptionsTest {

private static final ParquetOptions OPTIONS =
ParquetOptions.newBuilder().setEnableListInference(true).setEnumAsString(true).build();
ParquetOptions.newBuilder()
.setEnableListInference(true)
.setEnumAsString(true)
.setMapTargetType("ARRAY_OF_STRUCT")
.build();

@Test
public void testToBuilder() {
compareParquetOptions(OPTIONS, OPTIONS.toBuilder().build());
ParquetOptions parquetOptions = OPTIONS.toBuilder().setEnableListInference(true).build();
assertEquals(true, parquetOptions.getEnableListInference());
parquetOptions = parquetOptions.toBuilder().setEnumAsString(true).build();
parquetOptions =
parquetOptions
.toBuilder()
.setEnumAsString(true)
.setMapTargetType("ARRAY_OF_STRUCT")
.build();
compareParquetOptions(OPTIONS, parquetOptions);
}

Expand All @@ -47,6 +56,7 @@ public void testBuilder() {
assertEquals(FormatOptions.PARQUET, OPTIONS.getType());
assertEquals(true, OPTIONS.getEnableListInference());
assertEquals(true, OPTIONS.getEnumAsString());
assertEquals("ARRAY_OF_STRUCT", OPTIONS.getMapTargetType());
}

@Test
Expand Down

0 comments on commit 34f16fb

Please sign in to comment.