Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a snippet to show how to read a newline-delimited-json file and store it in a Table #2974

Merged
merged 8 commits into from
Mar 12, 2018
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ public int hashCode() {
* } catch (BigQueryException e) {
* // the dataset was not created
* }
* } </pre>
* }</pre>
*
* @throws BigQueryException upon failure
*/
Expand All @@ -538,7 +538,7 @@ public int hashCode() {
* String fieldName = "string_field";
* TableId tableId = TableId.of(datasetName, tableName);
* // Table field definition
* Field field = Field.of(fieldName, Field.Type.string());
* Field field = Field.of(fieldName, LegacySQLTypeName.STRING);
* // Table schema definition
* Schema schema = Schema.of(field);
* TableDefinition tableDefinition = StandardTableDefinition.of(schema);
Expand All @@ -553,6 +553,32 @@ public int hashCode() {
/**
* Creates a new job.
*
* <p>Example of loading a newline-delimited-json file with textual fields from GCS to a table.
* <pre> {@code
* String datasetName = "my_dataset_name";
* String tableName = "my_table_name";
* String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.json";
* TableId tableId = TableId.of(datasetName, tableName);
* // Table field definition
* Field[] fields = new Field[] {
* Field.of("name", LegacySQLTypeName.STRING),
* Field.of("post_abbr", LegacySQLTypeName.STRING)
* };
* // Table schema definition
* Schema schema = Schema.of(fields);
* LoadJobConfiguration configuration = LoadJobConfiguration.builder(tableId, sourceUri)
* .setFormatOptions(FormatOptions.json())
* .setCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
* .setSchema(schema)
* .build();
* // Load the table
* Job remoteLoadJob = bigquery.create(JobInfo.of(configuration));
* remoteLoadJob = remoteLoadJob.waitFor();
* // Check the table
* System.out.println("State: " + remoteLoadJob.getStatus().getState());
* return ((StandardTableDefinition) bigquery.getTable(tableId).getDefinition()).getNumRows();
* }</pre>
*
* <p>Example of creating a query job.
* <pre> {@code
* String query = "SELECT field FROM my_dataset_name.my_table_name";
Expand Down Expand Up @@ -861,8 +887,7 @@ public int hashCode() {
* Lists the table's rows.
*
* <p>Example of listing table rows, specifying the page size.
*
* <pre>{@code
* <pre> {@code
* String datasetName = "my_dataset_name";
* String tableName = "my_table_name";
* // This example reads the result 100 rows per RPC call. If there's no need to limit the number,
Expand All @@ -882,16 +907,15 @@ public int hashCode() {
* Lists the table's rows.
*
* <p>Example of listing table rows, specifying the page size.
*
* <pre>{@code
* <pre> {@code
* String datasetName = "my_dataset_name";
* String tableName = "my_table_name";
* TableId tableIdObject = TableId.of(datasetName, tableName);
* // This example reads the result 100 rows per RPC call. If there's no need to limit the number,
* // simply omit the option.
* TableResult tableData =
* bigquery.listTableData(tableIdObject, TableDataListOption.pageSize(100));
* for (FieldValueList row : rowIterator.hasNext()) {
* for (FieldValueList row : tableData.iterateAll()) {
* // do something with the row
* }
* }</pre>
Expand All @@ -904,17 +928,16 @@ public int hashCode() {
* Lists the table's rows. If the {@code schema} is not {@code null}, it is available to the
* {@link FieldValueList} iterated over.
*
* <p>Example of listing table rows.
*
* <pre>{@code
* <p>Example of listing table rows with schema.
* <pre> {@code
* String datasetName = "my_dataset_name";
* String tableName = "my_table_name";
* Schema schema = ...;
* String field = "my_field";
* String field = "field";
* TableResult tableData =
* bigquery.listTableData(datasetName, tableName, schema);
* for (FieldValueList row : tableData.iterateAll()) {
* row.get(field)
* row.get(field);
* }
* }</pre>
*
Expand All @@ -927,9 +950,8 @@ TableResult listTableData(
* Lists the table's rows. If the {@code schema} is not {@code null}, it is available to the
* {@link FieldValueList} iterated over.
*
* <p>Example of listing table rows.
*
* <pre>{@code
* <p>Example of listing table rows with schema.
* <pre> {@code
* Schema schema =
* Schema.of(
* Field.of("word", LegacySQLTypeName.STRING),
Expand Down Expand Up @@ -1047,28 +1069,21 @@ TableResult listTableData(
* queries. Since dry-run queries are not actually executed, there's no way to retrieve results.
*
* <p>Example of running a query.
*
* <pre>{@code
* String query = "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare`";
* QueryJobConfiguration queryConfig = QueryJobConfiguration.of(query);
*
* // To run the legacy syntax queries use the following code instead:
* // String query = "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]"
* // QueryJobConfiguration queryConfig =
* // QueryJobConfiguration.newBuilder(query).setUseLegacySql(true).build();
*
* <pre> {@code
* String query = "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]";
* QueryJobConfiguration queryConfig =
* QueryJobConfiguration.newBuilder(query).setUseLegacySql(true).build();
* for (FieldValueList row : bigquery.query(queryConfig).iterateAll()) {
* // do something with the data
* }
* }</pre>
*
* <p>Example of running a query with query parameters.
*
* <pre>{@code
* String query =
* "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > ?";
* <pre> {@code
* String query = "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > @wordCount";
* // Note, standard SQL is required to use query parameters. Legacy SQL will not work.
* QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query)
* .addPositionalParameter(QueryParameterValue.int64(5))
* .addNamedParameter("wordCount", QueryParameterValue.int64(5))
* .build();
* for (FieldValueList row : bigquery.query(queryConfig).iterateAll()) {
* // do something with the data
Expand All @@ -1092,18 +1107,6 @@ TableResult query(QueryJobConfiguration configuration, JobOption... options)
* <p>See {@link #query(QueryJobConfiguration, JobOption...)} for examples on populating a {@link
* QueryJobConfiguration}.
*
* <p>The recommended way to create a randomly generated JobId is the following:
*
* <pre>{@code
* JobId jobId = JobId.of();
* }</pre>
*
* For a user specified job id with an optional prefix use the following:
*
* <pre>{@code
* JobId jobId = JobId.of("my_prefix-my_unique_job_id");
* }</pre>
*
* @throws BigQueryException upon failure
* @throws InterruptedException if the current thread gets interrupted while waiting for the query
* to complete
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import com.google.api.client.util.Charsets;
import com.google.api.gax.paging.Page;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.JobInfo.CreateDisposition;
import com.google.cloud.bigquery.LoadJobConfiguration;
import com.google.cloud.bigquery.TableResult;
import com.google.cloud.bigquery.BigQuery.DatasetDeleteOption;
import com.google.cloud.bigquery.BigQuery.DatasetListOption;
Expand Down Expand Up @@ -378,6 +380,38 @@ public long writeFileToTable(String datasetName, String tableName, Path csvPath)
// [END writeFileToTable]
}

/**
* Example of loading a newline-delimited-json file with textual fields from GCS to a table.
*/
// [TARGET create(JobInfo, JobOption...)]
// [VARIABLE "my_dataset_name"]
// [VARIABLE "my_table_name"]
public Long writeRemoteFileToTable(String datasetName, String tableName)
throws InterruptedException {
// [START bigquery_load_table_gcs_json]
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.json";
TableId tableId = TableId.of(datasetName, tableName);
// Table field definition
Field[] fields = new Field[] {
Field.of("name", LegacySQLTypeName.STRING),
Field.of("post_abbr", LegacySQLTypeName.STRING)
};
// Table schema definition
Schema schema = Schema.of(fields);
LoadJobConfiguration configuration = LoadJobConfiguration.builder(tableId, sourceUri)
.setFormatOptions(FormatOptions.json())
.setCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
.setSchema(schema)
.build();
// Load the table
Job remoteLoadJob = bigquery.create(JobInfo.of(configuration));
remoteLoadJob = remoteLoadJob.waitFor();
// Check the table
System.out.println("State: " + remoteLoadJob.getStatus().getState());
return ((StandardTableDefinition) bigquery.getTable(tableId).getDefinition()).getNumRows();
// [END bigquery_load_table_gcs_json]
}

/**
* Example of inserting rows into a table without running a load job.
*/
Expand Down Expand Up @@ -470,7 +504,9 @@ public TableResult listTableDataFromId(String datasetName, String tableName) {
return tableData;
}

/** Example of listing table rows with schema. */
/**
* Example of listing table rows with schema.
*/
// [TARGET listTableData(String, String, Schema, TableDataListOption...)]
// [VARIABLE "my_dataset_name"]
// [VARIABLE "my_table_name"]
Expand All @@ -488,7 +524,9 @@ public TableResult listTableDataSchema(
return tableData;
}

/** Example of listing table rows with schema. */
/**
* Example of listing table rows with schema.
*/
// [TARGET listTableData(TableId, Schema, TableDataListOption...)]
public FieldValueList listTableDataSchemaId() {
// [START listTableDataSchemaId]
Expand Down Expand Up @@ -607,8 +645,10 @@ public boolean cancelJobFromId(String jobName) {
return success;
}

/** Example of running a query. */
// [TARGET query(QueryJobConfiguration, QueryOption...)]
/**
* Example of running a query.
*/
// [TARGET query(QueryJobConfiguration, JobOption...)]
// [VARIABLE "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]"]
public void runQuery(String query) throws InterruptedException {
// [START runQuery]
Expand All @@ -620,10 +660,11 @@ public void runQuery(String query) throws InterruptedException {
// [END runQuery]
}

/** Example of running a query with query parameters. */
// [TARGET query(QueryJobConfiguration, QueryOption...)]
// [VARIABLE "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where
// word_count > @wordCount"]
/**
* Example of running a query with query parameters.
*/
// [TARGET query(QueryJobConfiguration, JobOption...)]
// [VARIABLE "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > @wordCount"]
public void runQueryWithParameters(String query) throws InterruptedException {
// [START runQueryWithParameters]
// Note, standard SQL is required to use query parameters. Legacy SQL will not work.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;

import com.google.api.gax.paging.Page;
Expand Down Expand Up @@ -48,6 +49,7 @@
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.ExecutionException;
Expand Down Expand Up @@ -197,6 +199,24 @@ public void testWriteAndListTableData()
assertTrue(bigquerySnippets.deleteTable(DATASET, tableName));
}

@Test
public void testWriteRemoteJsonToTable() throws InterruptedException {
String datasetName = "test_dataset";
String tableName = "us_states";
Table table = bigquery.getTable(datasetName, tableName);
assertNull(table);

Long result = bigquerySnippets.writeRemoteFileToTable(datasetName, tableName);
table = bigquery.getTable(datasetName, tableName);
assertNotNull(table);
ArrayList<String> tableFieldNames = new ArrayList<>();
for (Field field: table.getDefinition().getSchema().getFields()) {
tableFieldNames.add(field.getName());
}
bigquery.delete(table.getTableId());
assertEquals(Long.valueOf(50), result);
}

@Test
public void testInsertAllAndListTableData() throws IOException, InterruptedException {
String tableName = "test_insert_all_and_list_table_data";
Expand Down