Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Export data from BigQuery samples #186

Merged
merged 12 commits into from
Aug 24, 2016
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion bigquery/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
"dependencies": {
"@google-cloud/bigquery": "^0.1.1",
"@google-cloud/resource": "^0.1.1",
"@google-cloud/storage": "^0.1.1",
"async": "^2.0.1",
"request": "^2.72.0"
"request": "^2.72.0",
"yargs": "^5.0.0"
},
"devDependencies": {
"mocha": "^3.0.2"
Expand Down
79 changes: 79 additions & 0 deletions bigquery/system-test/tables.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright 2016, Google, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

'use strict';

var uuid = require('node-uuid');
var generateUuid = function () {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hopefully doing this as a function (for brevity) is fine.

return 'nodejs_docs_samples_' + uuid.v4().replace(/-/gi, '_');
};
var example = require('../tables');
var options = {
bucket: generateUuid(),
file: 'data.json',
dataset: generateUuid(),
table: generateUuid()
};
var BigQuery = require('@google-cloud/bigquery');
var bigquery = BigQuery();
var Storage = require('@google-cloud/storage');
var storage = Storage();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should these be compressed, i.e var bigquery = require('@google-cloud/bigquery')();?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's fine either way.

var file = storage.bucket(options.bucket).file(options.file);

describe('bigquery:tables', function () {
before(function (done) {
// Create bucket
storage.createBucket(options.bucket, function (err, bucket) {
assert.ifError(err, 'bucket creation succeeded');

// Create dataset
bigquery.createDataset(options.dataset, function (err, dataset) {
assert.ifError(err, 'dataset creation succeeded');

// Create table
dataset.createTable(
options.table,
{ schema: 'name:string, age:integer' },
function (err, table) {
assert.ifError(err, 'table creation succeeded');
done();
}
);
});
});
});
after(function (done) {
// Delete table export
file.delete(function () {
// Delete testing dataset/table
bigquery.dataset(options.dataset).delete({ force: true }, done);
});
});
Copy link
Member

@jmdobry jmdobry Aug 24, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test isn't deleting the bucket that it creates. The simplest way to clean up a bucket and all its files can be found here: https://github.com/GoogleCloudPlatform/nodejs-docs-samples/blob/master/storage/system-test/files.test.js#L41

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.


describe('export_table_to_gcs', function () {
it('should export data to GCS', function (done) {
example.exportTableToGCS(options, function (err, metadata) {
assert.ifError(err, 'no error occurred');
assert(metadata, 'job metadata was received');
assert(metadata.status, 'job metadata has status');
assert.equal(metadata.status.state, 'DONE', 'job was finished');

file.exists(function (err, exists) {
assert.ifError(err, 'file existence check succeeded');
assert(exists, 'export destination exists');
done();
});
});
});
});
});
124 changes: 124 additions & 0 deletions bigquery/tables.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Copyright 2016, Google, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// [START complete]
/**
* Command-line application to export a table from BigQuery to Google Cloud Storage.
*
* This sample is used on this page:
*
* https://cloud.google.com/bigquery/exporting-data-from-bigquery
* For more information, see the README.md under /bigquery.
*/

'use strict';

// [START auth]
// By default, gcloud will authenticate using the service account file specified
// by the GOOGLE_APPLICATION_CREDENTIALS environment variable and use the
// project specified by the GCLOUD_PROJECT environment variable. See
// https://googlecloudplatform.github.io/gcloud-node/#/docs/guides/authentication
var BigQuery = require('@google-cloud/bigquery');
var Storage = require('@google-cloud/storage');

// Instantiate the BigQuery and Storage clients
var bigquery = BigQuery();
var storage = Storage();
// [END auth]

// [START export_table_to_gcs]
/**
* Export a table from BigQuery to Google Cloud Storage.
*
* @param {object} options Configuration options.
* @param {string} options.bucket A Google Cloud Storage bucket to use for storage.
* @param {string} options.file The file to save results to within Google Cloud Storage.
* @param {string} options.dataset The ID of the dataset to use.
* @param {string} options.table The ID of the project to use.
* @param {string} options.format Format to export as - either 'CSV', 'JSON', or 'AVRO'.
* @param {boolean} [options.gzip] Optional. Whether or not data should be compressed using GZIP.
* @param {function} callback Callback function to receive query results.
*/
function exportTableToGCS (options, callback) {
var gcsFileObj = storage.bucket(options.bucket).file(options.file);
// Export table
// See https://googlecloudplatform.github.io/gcloud-node/#/docs/google-cloud/latest/bigquery/table?method=export
var table = bigquery.dataset(options.dataset).table(options.table);
table.export(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a little hard to read, can we do:

var config = {
  format: options.format,
  gzip: options.gzip
};
table.export(gcsFileObj, config, function (err, job) {

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or better yet

var gcsFileObj = storage.bucket(options.bucket).file(options.file);
var table = bigquery.dataset(options.dataset).table(options.table);

var config = {
  format: options.format,
  gzip: options.gzip
};

See https://googlecloudplatform.github.io/gcloud-node/#/docs/google-cloud/latest/bigquery/table?method=export
table.export(gcsFileObj, config, function (err, job) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

gcsFileObj,
{
format: options.format,
gzip: options.gzip
},
function (err, job) {
if (err) {
return callback(err);
}
console.log('ExportTableToGCS: submitted job %s!', job.id);

job.on('error', function (err) {
return callback(err);
});
job.on('complete', function (job) {
return callback(null, job);
});
}
);
}
// [END export_table_to_gcs]
// [END complete]

// The command-line program
var cli = require('yargs');

var program = module.exports = {
exportTableToGCS: exportTableToGCS,
main: function (args) {
// Run the command-line program
cli.help().strict().parse(args).argv;
}
};

cli
.command('export <bucket> <file> <dataset> <table>', 'Export a table from BigQuery to Google Cloud Storage.', {
format: {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These aren't global options, so you should move them into the export command. Here's an example of how: https://github.com/GoogleCloudPlatform/nodejs-docs-samples/blob/master/logging/sinks.js#L168

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

alias: 'f',
global: true,
requiresArg: true,
type: 'string',
choices: ['JSON', 'CSV', 'AVRO'],
default: 'JSON'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you should have a default here, @google-cloud/bigquery has its own default that it chooses (which happens to be "csv").

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm...when I tested it, the default seemed to be JSON. (Though I agree with the "@google-cloud/bigquery has its own default" part.)

},
gzip: {
global: true,
type: 'boolean',
description: 'Whether to compress the exported table using gzip. Defaults to false.'
}
}, function (options) {
program.exportTableToGCS(options, console.log);
})
.example(
'node $0 export sample-bigquery-export data.json github_samples natality',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you change the names in these samples to be recognizable as what they are? For example, github_samples could be a bucket, dataset, table, or I don't know what. I'd have to look at the positional signature of the command to figure it out.

Better for these examples would be to have node $0 export my-bucket my-file.json my_dataset my_table, then it becomes obvious to the user what is what.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

'Export github_samples:natality to gcs://sample-bigquery-export/data.json as raw JSON'
)
.example(
'node $0 export sample-bigquery-export data.csv github_samples natality -f CSV --gzip',
'Export github_samples:natality to gcs://sample-bigquery-export/data.csv as gzipped CSV'
)
.wrap(100)
.recommendCommands()
.epilogue('For more information, see https://cloud.google.com/bigquery/exporting-data-from-bigquery');
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might want to change the link to just https://cloud.google.com/bigquery/docs as this file will contain other examples unrelated to exporting data from BigQuery.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.


if (module === require.main) {
program.main(process.argv.slice(2));
}
161 changes: 161 additions & 0 deletions bigquery/test/tables.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
// Copyright 2016, Google, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

'use strict';

var proxyquire = require('proxyquire').noCallThru();
var bucket = 'bucket';
var file = 'file';
var dataset = 'dataset';
var table = 'table';
var format = 'JSON';

function getSample () {
var bucketMock = {
file: sinon.stub().returns(fileMock)
};
var storageMock = {
bucket: sinon.stub().returns(bucketMock)
};
var fileMock = {};
var metadataMock = { status: { state: 'DONE' } };
var jobMock = {
getMetadata: sinon.stub().callsArgWith(0, null, metadataMock),
on: sinon.stub()
};
var tableMock = {
export: sinon.stub().callsArgWith(2, null, jobMock)
};
var datasetMock = {
table: sinon.stub().returns(tableMock)
};
var bigqueryMock = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Collapse the extra lines between all the var declarations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

job: sinon.stub().returns(jobMock),
dataset: sinon.stub().returns(datasetMock)
};
var BigQueryMock = sinon.stub().returns(bigqueryMock);
var StorageMock = sinon.stub().returns(storageMock);

return {
program: proxyquire('../tables', {
'@google-cloud/bigquery': BigQueryMock,
'@google-cloud/storage': StorageMock,
yargs: proxyquire('yargs', {})
}),
mocks: {
BigQuery: BigQueryMock,
bigquery: bigqueryMock,
Storage: StorageMock,
storage: storageMock,
metadata: metadataMock,
job: jobMock,
table: tableMock,
bucket: bucketMock,
dataset: datasetMock
}
};
}

describe('bigquery:tables', function () {
describe('exportTable', function () {
it('should export to a table', function () {
var example = getSample();
var options = {
bucket: bucket,
file: file,
dataset: dataset,
table: table,
format: format,
gzip: true
};
var callback = sinon.stub();
example.mocks.job.on.withArgs('complete').callsArgWith(1, example.mocks.metadata);

example.program.exportTableToGCS(options, callback);

assert(example.mocks.storage.bucket.calledWith(options.bucket), 'bucket found');
assert(example.mocks.bucket.file.calledWith(options.file), 'file found');
assert(example.mocks.bigquery.dataset.calledWith(options.dataset), 'dataset found');
assert(example.mocks.dataset.table.calledWith(options.table), 'table found');
assert(example.mocks.table.export.calledOnce, 'table.export called once');
assert(console.log.calledWith('ExportTableToGCS: submitted job %s!', example.mocks.job.id),
'job submittal was reported'
);

assert(callback.calledOnce, 'callback called once');
assert.equal(callback.firstCall.args.length, 2, 'callback received 2 arguments');
assert.ifError(callback.firstCall.args[0], 'callback did not receive error');
assert.equal(callback.firstCall.args[1], example.mocks.metadata, 'callback received metadata');
});

it('should handle export error', function () {
var error = new Error('exportTableToGCSError');
var example = getSample();
var callback = sinon.stub();
example.mocks.table.export = sinon.stub().callsArgWith(2, error);
example.program.exportTableToGCS({ format: format }, callback);

assert(callback.calledOnce, 'callback called once');
assert.equal(callback.firstCall.args.length, 1, 'callback received 1 argument');
assert.equal(callback.firstCall.args[0], error, 'callback received error');
});

it('should handle job-processing error', function () {
var error = new Error('exportTableToGCSError');
var example = getSample();
var callback = sinon.stub();
example.mocks.job.on.withArgs('error').callsArgWith(1, error);
example.program.exportTableToGCS({ format: format }, callback);

assert(callback.calledOnce, 'callback called once');
assert.equal(callback.firstCall.args.length, 1, 'callback received 1 argument');
assert.equal(callback.firstCall.args[0], error, 'callback received error');
});
});

describe('main', function () {
it('should call exportTableToGCS', function () {
var program = getSample().program;
program.exportTableToGCS = sinon.stub();

program.main(['export', bucket, file, dataset, table]);
assert(program.exportTableToGCS.calledOnce, 'exportTableToGCS called');
});

it('should recognize --gzip flag', function () {
var program = getSample().program;
program.exportTableToGCS = sinon.stub();

program.main(['export', bucket, file, dataset, table, '--gzip']);
assert(program.exportTableToGCS.calledOnce, 'exportTableToGCS called once');

var firstArgs = program.exportTableToGCS.firstCall.args;
assert.equal(firstArgs.length, 2, 'exportTableToGCS received 2 arguments');
assert(firstArgs[0], 'exportTableToGCS received options');
assert(firstArgs[0].gzip, 'exportTableToGCS received gzip as True');
});

it('should recognize --format flag', function () {
var program = getSample().program;
program.exportTableToGCS = sinon.stub();

program.main(['export', bucket, file, dataset, table, '--format', 'CSV']);
assert(program.exportTableToGCS.calledOnce, 'exportTableToGCS called once');

var firstArgs = program.exportTableToGCS.firstCall.args;
assert.equal(firstArgs.length, 2, 'exportTableToGCS received 2 arguments');
assert(firstArgs[0], 'exportTableToGCS received options');
assert.equal(firstArgs[0].format, 'CSV', 'exportTableToGCS received format as CSV');
});
});
});