Skip to content

Commit

Permalink
Added a snippet to show how to read a newline-delimited-json file and…
Browse files Browse the repository at this point in the history
… store it in a Table (#2974)
  • Loading branch information
happyhuman authored and pongad committed Mar 12, 2018
1 parent a21cc36 commit 848245e
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ public int hashCode() {
* } catch (BigQueryException e) {
* // the dataset was not created
* }
* } </pre>
* }</pre>
*
* @throws BigQueryException upon failure
*/
Expand All @@ -538,7 +538,7 @@ public int hashCode() {
* String fieldName = "string_field";
* TableId tableId = TableId.of(datasetName, tableName);
* // Table field definition
* Field field = Field.of(fieldName, Field.Type.string());
* Field field = Field.of(fieldName, LegacySQLTypeName.STRING);
* // Table schema definition
* Schema schema = Schema.of(field);
* TableDefinition tableDefinition = StandardTableDefinition.of(schema);
Expand All @@ -553,6 +553,32 @@ public int hashCode() {
/**
* Creates a new job.
*
* <p>Example of loading a newline-delimited-json file with textual fields from GCS to a table.
* <pre> {@code
* String datasetName = "my_dataset_name";
* String tableName = "my_table_name";
* String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.json";
* TableId tableId = TableId.of(datasetName, tableName);
* // Table field definition
* Field[] fields = new Field[] {
* Field.of("name", LegacySQLTypeName.STRING),
* Field.of("post_abbr", LegacySQLTypeName.STRING)
* };
* // Table schema definition
* Schema schema = Schema.of(fields);
* LoadJobConfiguration configuration = LoadJobConfiguration.builder(tableId, sourceUri)
* .setFormatOptions(FormatOptions.json())
* .setCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
* .setSchema(schema)
* .build();
* // Load the table
* Job remoteLoadJob = bigquery.create(JobInfo.of(configuration));
* remoteLoadJob = remoteLoadJob.waitFor();
* // Check the table
* System.out.println("State: " + remoteLoadJob.getStatus().getState());
* return ((StandardTableDefinition) bigquery.getTable(tableId).getDefinition()).getNumRows();
* }</pre>
*
* <p>Example of creating a query job.
* <pre> {@code
* String query = "SELECT field FROM my_dataset_name.my_table_name";
Expand Down Expand Up @@ -861,8 +887,7 @@ public int hashCode() {
* Lists the table's rows.
*
* <p>Example of listing table rows, specifying the page size.
*
* <pre>{@code
* <pre> {@code
* String datasetName = "my_dataset_name";
* String tableName = "my_table_name";
* // This example reads the result 100 rows per RPC call. If there's no need to limit the number,
Expand All @@ -882,16 +907,15 @@ public int hashCode() {
* Lists the table's rows.
*
* <p>Example of listing table rows, specifying the page size.
*
* <pre>{@code
* <pre> {@code
* String datasetName = "my_dataset_name";
* String tableName = "my_table_name";
* TableId tableIdObject = TableId.of(datasetName, tableName);
* // This example reads the result 100 rows per RPC call. If there's no need to limit the number,
* // simply omit the option.
* TableResult tableData =
* bigquery.listTableData(tableIdObject, TableDataListOption.pageSize(100));
* for (FieldValueList row : rowIterator.hasNext()) {
* for (FieldValueList row : tableData.iterateAll()) {
* // do something with the row
* }
* }</pre>
Expand All @@ -904,17 +928,16 @@ public int hashCode() {
* Lists the table's rows. If the {@code schema} is not {@code null}, it is available to the
* {@link FieldValueList} iterated over.
*
* <p>Example of listing table rows.
*
* <pre>{@code
* <p>Example of listing table rows with schema.
* <pre> {@code
* String datasetName = "my_dataset_name";
* String tableName = "my_table_name";
* Schema schema = ...;
* String field = "my_field";
* String field = "field";
* TableResult tableData =
* bigquery.listTableData(datasetName, tableName, schema);
* for (FieldValueList row : tableData.iterateAll()) {
* row.get(field)
* row.get(field);
* }
* }</pre>
*
Expand All @@ -927,9 +950,8 @@ TableResult listTableData(
* Lists the table's rows. If the {@code schema} is not {@code null}, it is available to the
* {@link FieldValueList} iterated over.
*
* <p>Example of listing table rows.
*
* <pre>{@code
* <p>Example of listing table rows with schema.
* <pre> {@code
* Schema schema =
* Schema.of(
* Field.of("word", LegacySQLTypeName.STRING),
Expand Down Expand Up @@ -1047,28 +1069,21 @@ TableResult listTableData(
* queries. Since dry-run queries are not actually executed, there's no way to retrieve results.
*
* <p>Example of running a query.
*
* <pre>{@code
* String query = "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare`";
* QueryJobConfiguration queryConfig = QueryJobConfiguration.of(query);
*
* // To run the legacy syntax queries use the following code instead:
* // String query = "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]"
* // QueryJobConfiguration queryConfig =
* // QueryJobConfiguration.newBuilder(query).setUseLegacySql(true).build();
*
* <pre> {@code
* String query = "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]";
* QueryJobConfiguration queryConfig =
* QueryJobConfiguration.newBuilder(query).setUseLegacySql(true).build();
* for (FieldValueList row : bigquery.query(queryConfig).iterateAll()) {
* // do something with the data
* }
* }</pre>
*
* <p>Example of running a query with query parameters.
*
* <pre>{@code
* String query =
* "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > ?";
* <pre> {@code
* String query = "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > @wordCount";
* // Note, standard SQL is required to use query parameters. Legacy SQL will not work.
* QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query)
* .addPositionalParameter(QueryParameterValue.int64(5))
* .addNamedParameter("wordCount", QueryParameterValue.int64(5))
* .build();
* for (FieldValueList row : bigquery.query(queryConfig).iterateAll()) {
* // do something with the data
Expand All @@ -1092,18 +1107,6 @@ TableResult query(QueryJobConfiguration configuration, JobOption... options)
* <p>See {@link #query(QueryJobConfiguration, JobOption...)} for examples on populating a {@link
* QueryJobConfiguration}.
*
* <p>The recommended way to create a randomly generated JobId is the following:
*
* <pre>{@code
* JobId jobId = JobId.of();
* }</pre>
*
* For a user specified job id with an optional prefix use the following:
*
* <pre>{@code
* JobId jobId = JobId.of("my_prefix-my_unique_job_id");
* }</pre>
*
* @throws BigQueryException upon failure
* @throws InterruptedException if the current thread gets interrupted while waiting for the query
* to complete
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import com.google.api.client.util.Charsets;
import com.google.api.gax.paging.Page;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.JobInfo.CreateDisposition;
import com.google.cloud.bigquery.LoadJobConfiguration;
import com.google.cloud.bigquery.TableResult;
import com.google.cloud.bigquery.BigQuery.DatasetDeleteOption;
import com.google.cloud.bigquery.BigQuery.DatasetListOption;
Expand Down Expand Up @@ -378,6 +380,38 @@ public long writeFileToTable(String datasetName, String tableName, Path csvPath)
// [END writeFileToTable]
}

/**
* Example of loading a newline-delimited-json file with textual fields from GCS to a table.
*/
// [TARGET create(JobInfo, JobOption...)]
// [VARIABLE "my_dataset_name"]
// [VARIABLE "my_table_name"]
public Long writeRemoteFileToTable(String datasetName, String tableName)
throws InterruptedException {
// [START bigquery_load_table_gcs_json]
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.json";
TableId tableId = TableId.of(datasetName, tableName);
// Table field definition
Field[] fields = new Field[] {
Field.of("name", LegacySQLTypeName.STRING),
Field.of("post_abbr", LegacySQLTypeName.STRING)
};
// Table schema definition
Schema schema = Schema.of(fields);
LoadJobConfiguration configuration = LoadJobConfiguration.builder(tableId, sourceUri)
.setFormatOptions(FormatOptions.json())
.setCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
.setSchema(schema)
.build();
// Load the table
Job remoteLoadJob = bigquery.create(JobInfo.of(configuration));
remoteLoadJob = remoteLoadJob.waitFor();
// Check the table
System.out.println("State: " + remoteLoadJob.getStatus().getState());
return ((StandardTableDefinition) bigquery.getTable(tableId).getDefinition()).getNumRows();
// [END bigquery_load_table_gcs_json]
}

/**
* Example of inserting rows into a table without running a load job.
*/
Expand Down Expand Up @@ -470,7 +504,9 @@ public TableResult listTableDataFromId(String datasetName, String tableName) {
return tableData;
}

/** Example of listing table rows with schema. */
/**
* Example of listing table rows with schema.
*/
// [TARGET listTableData(String, String, Schema, TableDataListOption...)]
// [VARIABLE "my_dataset_name"]
// [VARIABLE "my_table_name"]
Expand All @@ -488,7 +524,9 @@ public TableResult listTableDataSchema(
return tableData;
}

/** Example of listing table rows with schema. */
/**
* Example of listing table rows with schema.
*/
// [TARGET listTableData(TableId, Schema, TableDataListOption...)]
public FieldValueList listTableDataSchemaId() {
// [START listTableDataSchemaId]
Expand Down Expand Up @@ -607,8 +645,10 @@ public boolean cancelJobFromId(String jobName) {
return success;
}

/** Example of running a query. */
// [TARGET query(QueryJobConfiguration, QueryOption...)]
/**
* Example of running a query.
*/
// [TARGET query(QueryJobConfiguration, JobOption...)]
// [VARIABLE "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]"]
public void runQuery(String query) throws InterruptedException {
// [START runQuery]
Expand All @@ -620,10 +660,11 @@ public void runQuery(String query) throws InterruptedException {
// [END runQuery]
}

/** Example of running a query with query parameters. */
// [TARGET query(QueryJobConfiguration, QueryOption...)]
// [VARIABLE "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where
// word_count > @wordCount"]
/**
* Example of running a query with query parameters.
*/
// [TARGET query(QueryJobConfiguration, JobOption...)]
// [VARIABLE "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > @wordCount"]
public void runQueryWithParameters(String query) throws InterruptedException {
// [START runQueryWithParameters]
// Note, standard SQL is required to use query parameters. Legacy SQL will not work.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;

import com.google.api.gax.paging.Page;
Expand Down Expand Up @@ -48,6 +49,7 @@
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.ExecutionException;
Expand Down Expand Up @@ -197,6 +199,24 @@ public void testWriteAndListTableData()
assertTrue(bigquerySnippets.deleteTable(DATASET, tableName));
}

@Test
public void testWriteRemoteJsonToTable() throws InterruptedException {
String datasetName = "test_dataset";
String tableName = "us_states";
Table table = bigquery.getTable(datasetName, tableName);
assertNull(table);

Long result = bigquerySnippets.writeRemoteFileToTable(datasetName, tableName);
table = bigquery.getTable(datasetName, tableName);
assertNotNull(table);
ArrayList<String> tableFieldNames = new ArrayList<>();
for (Field field: table.getDefinition().getSchema().getFields()) {
tableFieldNames.add(field.getName());
}
bigquery.delete(table.getTableId());
assertEquals(Long.valueOf(50), result);
}

@Test
public void testInsertAllAndListTableData() throws IOException, InterruptedException {
String tableName = "test_insert_all_and_list_table_data";
Expand Down

0 comments on commit 848245e

Please sign in to comment.