From 848245e10efcb6a39b0de435c621e2ccb4e8a83d Mon Sep 17 00:00:00 2001 From: Shahin Date: Sun, 11 Mar 2018 17:28:09 -0700 Subject: [PATCH] Added a snippet to show how to read a newline-delimited-json file and store it in a Table (#2974) --- .../com/google/cloud/bigquery/BigQuery.java | 87 ++++++++++--------- .../bigquery/snippets/BigQuerySnippets.java | 57 ++++++++++-- .../bigquery/snippets/ITBigQuerySnippets.java | 20 +++++ 3 files changed, 114 insertions(+), 50 deletions(-) diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/BigQuery.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/BigQuery.java index 9d69342f754a..7f216910f8d9 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/BigQuery.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/BigQuery.java @@ -522,7 +522,7 @@ public int hashCode() { * } catch (BigQueryException e) { * // the dataset was not created * } - * } + * } * * @throws BigQueryException upon failure */ @@ -538,7 +538,7 @@ public int hashCode() { * String fieldName = "string_field"; * TableId tableId = TableId.of(datasetName, tableName); * // Table field definition - * Field field = Field.of(fieldName, Field.Type.string()); + * Field field = Field.of(fieldName, LegacySQLTypeName.STRING); * // Table schema definition * Schema schema = Schema.of(field); * TableDefinition tableDefinition = StandardTableDefinition.of(schema); @@ -553,6 +553,32 @@ public int hashCode() { /** * Creates a new job. * + *

Example of loading a newline-delimited-json file with textual fields from GCS to a table. + *

 {@code
+   * String datasetName = "my_dataset_name";
+   * String tableName = "my_table_name";
+   * String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.json";
+   * TableId tableId = TableId.of(datasetName, tableName);
+   * // Table field definition
+   * Field[] fields = new Field[] {
+   *     Field.of("name", LegacySQLTypeName.STRING),
+   *     Field.of("post_abbr", LegacySQLTypeName.STRING)
+   * };
+   * // Table schema definition
+   * Schema schema = Schema.of(fields);
+   * LoadJobConfiguration configuration = LoadJobConfiguration.builder(tableId, sourceUri)
+   *     .setFormatOptions(FormatOptions.json())
+   *     .setCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
+   *     .setSchema(schema)
+   *     .build();
+   * // Load the table
+   * Job remoteLoadJob = bigquery.create(JobInfo.of(configuration));
+   * remoteLoadJob = remoteLoadJob.waitFor();
+   * // Check the table
+   * System.out.println("State: " + remoteLoadJob.getStatus().getState());
+   * return ((StandardTableDefinition) bigquery.getTable(tableId).getDefinition()).getNumRows();
+   * }
+ * *

Example of creating a query job. *

 {@code
    * String query = "SELECT field FROM my_dataset_name.my_table_name";
@@ -861,8 +887,7 @@ public int hashCode() {
    * Lists the table's rows.
    *
    * 

Example of listing table rows, specifying the page size. - * - *

{@code
+   * 
 {@code
    * String datasetName = "my_dataset_name";
    * String tableName = "my_table_name";
    * // This example reads the result 100 rows per RPC call. If there's no need to limit the number,
@@ -882,8 +907,7 @@ public int hashCode() {
    * Lists the table's rows.
    *
    * 

Example of listing table rows, specifying the page size. - * - *

{@code
+   * 
 {@code
    * String datasetName = "my_dataset_name";
    * String tableName = "my_table_name";
    * TableId tableIdObject = TableId.of(datasetName, tableName);
@@ -891,7 +915,7 @@ public int hashCode() {
    * // simply omit the option.
    * TableResult tableData =
    *     bigquery.listTableData(tableIdObject, TableDataListOption.pageSize(100));
-   * for (FieldValueList row : rowIterator.hasNext()) {
+   * for (FieldValueList row : tableData.iterateAll()) {
    *   // do something with the row
    * }
    * }
@@ -904,17 +928,16 @@ public int hashCode() { * Lists the table's rows. If the {@code schema} is not {@code null}, it is available to the * {@link FieldValueList} iterated over. * - *

Example of listing table rows. - * - *

{@code
+   * 

Example of listing table rows with schema. + *

 {@code
    * String datasetName = "my_dataset_name";
    * String tableName = "my_table_name";
    * Schema schema = ...;
-   * String field = "my_field";
+   * String field = "field";
    * TableResult tableData =
    *     bigquery.listTableData(datasetName, tableName, schema);
    * for (FieldValueList row : tableData.iterateAll()) {
-   *   row.get(field)
+   *   row.get(field);
    * }
    * }
* @@ -927,9 +950,8 @@ TableResult listTableData( * Lists the table's rows. If the {@code schema} is not {@code null}, it is available to the * {@link FieldValueList} iterated over. * - *

Example of listing table rows. - * - *

{@code
+   * 

Example of listing table rows with schema. + *

 {@code
    * Schema schema =
    *     Schema.of(
    *         Field.of("word", LegacySQLTypeName.STRING),
@@ -1047,28 +1069,21 @@ TableResult listTableData(
    * queries. Since dry-run queries are not actually executed, there's no way to retrieve results.
    *
    * 

Example of running a query. - * - *

{@code
-   * String query = "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare`";
-   * QueryJobConfiguration queryConfig = QueryJobConfiguration.of(query);
-   *
-   * // To run the legacy syntax queries use the following code instead:
-   * //   String query = "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]"
-   * //   QueryJobConfiguration queryConfig =
-   * //       QueryJobConfiguration.newBuilder(query).setUseLegacySql(true).build();
-   *
+   * 
 {@code
+   * String query = "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]";
+   * QueryJobConfiguration queryConfig =
+   *     QueryJobConfiguration.newBuilder(query).setUseLegacySql(true).build();
    * for (FieldValueList row : bigquery.query(queryConfig).iterateAll()) {
    *   // do something with the data
    * }
    * }
* *

Example of running a query with query parameters. - * - *

{@code
-   * String query =
-   *     "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > ?";
+   * 
 {@code
+   * String query = "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > @wordCount";
+   * // Note, standard SQL is required to use query parameters. Legacy SQL will not work.
    * QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query)
-   *     .addPositionalParameter(QueryParameterValue.int64(5))
+   *     .addNamedParameter("wordCount", QueryParameterValue.int64(5))
    *     .build();
    * for (FieldValueList row : bigquery.query(queryConfig).iterateAll()) {
    *   // do something with the data
@@ -1092,18 +1107,6 @@ TableResult query(QueryJobConfiguration configuration, JobOption... options)
    * 

See {@link #query(QueryJobConfiguration, JobOption...)} for examples on populating a {@link * QueryJobConfiguration}. * - *

The recommended way to create a randomly generated JobId is the following: - * - *

{@code
-   * JobId jobId = JobId.of();
-   * }
- * - * For a user specified job id with an optional prefix use the following: - * - *
{@code
-   * JobId jobId = JobId.of("my_prefix-my_unique_job_id");
-   * }
- * * @throws BigQueryException upon failure * @throws InterruptedException if the current thread gets interrupted while waiting for the query * to complete diff --git a/google-cloud-examples/src/main/java/com/google/cloud/examples/bigquery/snippets/BigQuerySnippets.java b/google-cloud-examples/src/main/java/com/google/cloud/examples/bigquery/snippets/BigQuerySnippets.java index d6701804a6f9..ea95c59bbcf9 100644 --- a/google-cloud-examples/src/main/java/com/google/cloud/examples/bigquery/snippets/BigQuerySnippets.java +++ b/google-cloud-examples/src/main/java/com/google/cloud/examples/bigquery/snippets/BigQuerySnippets.java @@ -25,6 +25,8 @@ import com.google.api.client.util.Charsets; import com.google.api.gax.paging.Page; import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.bigquery.JobInfo.CreateDisposition; +import com.google.cloud.bigquery.LoadJobConfiguration; import com.google.cloud.bigquery.TableResult; import com.google.cloud.bigquery.BigQuery.DatasetDeleteOption; import com.google.cloud.bigquery.BigQuery.DatasetListOption; @@ -378,6 +380,38 @@ public long writeFileToTable(String datasetName, String tableName, Path csvPath) // [END writeFileToTable] } + /** + * Example of loading a newline-delimited-json file with textual fields from GCS to a table. + */ + // [TARGET create(JobInfo, JobOption...)] + // [VARIABLE "my_dataset_name"] + // [VARIABLE "my_table_name"] + public Long writeRemoteFileToTable(String datasetName, String tableName) + throws InterruptedException { + // [START bigquery_load_table_gcs_json] + String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.json"; + TableId tableId = TableId.of(datasetName, tableName); + // Table field definition + Field[] fields = new Field[] { + Field.of("name", LegacySQLTypeName.STRING), + Field.of("post_abbr", LegacySQLTypeName.STRING) + }; + // Table schema definition + Schema schema = Schema.of(fields); + LoadJobConfiguration configuration = LoadJobConfiguration.builder(tableId, sourceUri) + .setFormatOptions(FormatOptions.json()) + .setCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) + .setSchema(schema) + .build(); + // Load the table + Job remoteLoadJob = bigquery.create(JobInfo.of(configuration)); + remoteLoadJob = remoteLoadJob.waitFor(); + // Check the table + System.out.println("State: " + remoteLoadJob.getStatus().getState()); + return ((StandardTableDefinition) bigquery.getTable(tableId).getDefinition()).getNumRows(); + // [END bigquery_load_table_gcs_json] + } + /** * Example of inserting rows into a table without running a load job. */ @@ -470,7 +504,9 @@ public TableResult listTableDataFromId(String datasetName, String tableName) { return tableData; } - /** Example of listing table rows with schema. */ + /** + * Example of listing table rows with schema. + */ // [TARGET listTableData(String, String, Schema, TableDataListOption...)] // [VARIABLE "my_dataset_name"] // [VARIABLE "my_table_name"] @@ -488,7 +524,9 @@ public TableResult listTableDataSchema( return tableData; } - /** Example of listing table rows with schema. */ + /** + * Example of listing table rows with schema. + */ // [TARGET listTableData(TableId, Schema, TableDataListOption...)] public FieldValueList listTableDataSchemaId() { // [START listTableDataSchemaId] @@ -607,8 +645,10 @@ public boolean cancelJobFromId(String jobName) { return success; } - /** Example of running a query. */ - // [TARGET query(QueryJobConfiguration, QueryOption...)] + /** + * Example of running a query. + */ + // [TARGET query(QueryJobConfiguration, JobOption...)] // [VARIABLE "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]"] public void runQuery(String query) throws InterruptedException { // [START runQuery] @@ -620,10 +660,11 @@ public void runQuery(String query) throws InterruptedException { // [END runQuery] } - /** Example of running a query with query parameters. */ - // [TARGET query(QueryJobConfiguration, QueryOption...)] - // [VARIABLE "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where - // word_count > @wordCount"] + /** + * Example of running a query with query parameters. + */ + // [TARGET query(QueryJobConfiguration, JobOption...)] + // [VARIABLE "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > @wordCount"] public void runQueryWithParameters(String query) throws InterruptedException { // [START runQueryWithParameters] // Note, standard SQL is required to use query parameters. Legacy SQL will not work. diff --git a/google-cloud-examples/src/test/java/com/google/cloud/examples/bigquery/snippets/ITBigQuerySnippets.java b/google-cloud-examples/src/test/java/com/google/cloud/examples/bigquery/snippets/ITBigQuerySnippets.java index d5c081f8368f..2ef79ed95a83 100644 --- a/google-cloud-examples/src/test/java/com/google/cloud/examples/bigquery/snippets/ITBigQuerySnippets.java +++ b/google-cloud-examples/src/test/java/com/google/cloud/examples/bigquery/snippets/ITBigQuerySnippets.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import com.google.api.gax.paging.Page; @@ -48,6 +49,7 @@ import java.net.URISyntaxException; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.Iterator; import java.util.Set; import java.util.concurrent.ExecutionException; @@ -197,6 +199,24 @@ public void testWriteAndListTableData() assertTrue(bigquerySnippets.deleteTable(DATASET, tableName)); } + @Test + public void testWriteRemoteJsonToTable() throws InterruptedException { + String datasetName = "test_dataset"; + String tableName = "us_states"; + Table table = bigquery.getTable(datasetName, tableName); + assertNull(table); + + Long result = bigquerySnippets.writeRemoteFileToTable(datasetName, tableName); + table = bigquery.getTable(datasetName, tableName); + assertNotNull(table); + ArrayList tableFieldNames = new ArrayList<>(); + for (Field field: table.getDefinition().getSchema().getFields()) { + tableFieldNames.add(field.getName()); + } + bigquery.delete(table.getTableId()); + assertEquals(Long.valueOf(50), result); + } + @Test public void testInsertAllAndListTableData() throws IOException, InterruptedException { String tableName = "test_insert_all_and_list_table_data";