-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Export data from BigQuery samples #186
Changes from 9 commits
bbe29c7
3d35fac
bbd5ee8
29df1db
f2b5d96
05a1de0
8f333df
5b510a7
1d76a8b
bd34778
7ec5b5a
8173529
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
// Copyright 2016, Google, Inc. | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
'use strict'; | ||
|
||
var uuid = require('node-uuid'); | ||
var generateUuid = function () { | ||
return 'nodejs_docs_samples_' + uuid.v4().replace(/-/gi, '_'); | ||
}; | ||
var example = require('../tables'); | ||
var options = { | ||
bucket: generateUuid(), | ||
file: 'data.json', | ||
dataset: generateUuid(), | ||
table: generateUuid() | ||
}; | ||
var BigQuery = require('@google-cloud/bigquery'); | ||
var bigquery = BigQuery(); | ||
var Storage = require('@google-cloud/storage'); | ||
var storage = Storage(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should these be compressed, i.e There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's fine either way. |
||
var file = storage.bucket(options.bucket).file(options.file); | ||
|
||
describe('bigquery:tables', function () { | ||
before(function (done) { | ||
// Create bucket | ||
storage.createBucket(options.bucket, function (err, bucket) { | ||
assert.ifError(err, 'bucket creation succeeded'); | ||
|
||
// Create dataset | ||
bigquery.createDataset(options.dataset, function (err, dataset) { | ||
assert.ifError(err, 'dataset creation succeeded'); | ||
|
||
// Create table | ||
dataset.createTable( | ||
options.table, | ||
{ schema: 'name:string, age:integer' }, | ||
function (err, table) { | ||
assert.ifError(err, 'table creation succeeded'); | ||
done(); | ||
} | ||
); | ||
}); | ||
}); | ||
}); | ||
after(function (done) { | ||
// Delete table export | ||
file.delete(function () { | ||
// Delete testing dataset/table | ||
bigquery.dataset(options.dataset).delete({ force: true }, done); | ||
}); | ||
}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test isn't deleting the bucket that it creates. The simplest way to clean up a bucket and all its files can be found here: https://github.com/GoogleCloudPlatform/nodejs-docs-samples/blob/master/storage/system-test/files.test.js#L41 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
|
||
describe('export_table_to_gcs', function () { | ||
it('should export data to GCS', function (done) { | ||
example.exportTableToGCS(options, function (err, metadata) { | ||
assert.ifError(err, 'no error occurred'); | ||
assert(metadata, 'job metadata was received'); | ||
assert(metadata.status, 'job metadata has status'); | ||
assert.equal(metadata.status.state, 'DONE', 'job was finished'); | ||
|
||
file.exists(function (err, exists) { | ||
assert.ifError(err, 'file existence check succeeded'); | ||
assert(exists, 'export destination exists'); | ||
done(); | ||
}); | ||
}); | ||
}); | ||
}); | ||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
// Copyright 2016, Google, Inc. | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
// [START complete] | ||
/** | ||
* Command-line application to export a table from BigQuery to Google Cloud Storage. | ||
* | ||
* This sample is used on this page: | ||
* | ||
* https://cloud.google.com/bigquery/exporting-data-from-bigquery | ||
* For more information, see the README.md under /bigquery. | ||
*/ | ||
|
||
'use strict'; | ||
|
||
// [START auth] | ||
// By default, gcloud will authenticate using the service account file specified | ||
// by the GOOGLE_APPLICATION_CREDENTIALS environment variable and use the | ||
// project specified by the GCLOUD_PROJECT environment variable. See | ||
// https://googlecloudplatform.github.io/gcloud-node/#/docs/guides/authentication | ||
var BigQuery = require('@google-cloud/bigquery'); | ||
var Storage = require('@google-cloud/storage'); | ||
|
||
// Instantiate the BigQuery and Storage clients | ||
var bigquery = BigQuery(); | ||
var storage = Storage(); | ||
// [END auth] | ||
|
||
// [START export_table_to_gcs] | ||
/** | ||
* Export a table from BigQuery to Google Cloud Storage. | ||
* | ||
* @param {object} options Configuration options. | ||
* @param {string} options.bucket A Google Cloud Storage bucket to use for storage. | ||
* @param {string} options.file The file to save results to within Google Cloud Storage. | ||
* @param {string} options.dataset The ID of the dataset to use. | ||
* @param {string} options.table The ID of the project to use. | ||
* @param {string} options.format Format to export as - either 'CSV', 'JSON', or 'AVRO'. | ||
* @param {boolean} [options.gzip] Optional. Whether or not data should be compressed using GZIP. | ||
* @param {function} callback Callback function to receive query results. | ||
*/ | ||
function exportTableToGCS (options, callback) { | ||
var gcsFileObj = storage.bucket(options.bucket).file(options.file); | ||
// Export table | ||
// See https://googlecloudplatform.github.io/gcloud-node/#/docs/google-cloud/latest/bigquery/table?method=export | ||
var table = bigquery.dataset(options.dataset).table(options.table); | ||
table.export( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a little hard to read, can we do: var config = {
format: options.format,
gzip: options.gzip
};
table.export(gcsFileObj, config, function (err, job) { There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or better yet var gcsFileObj = storage.bucket(options.bucket).file(options.file);
var table = bigquery.dataset(options.dataset).table(options.table);
var config = {
format: options.format,
gzip: options.gzip
};
See https://googlecloudplatform.github.io/gcloud-node/#/docs/google-cloud/latest/bigquery/table?method=export
table.export(gcsFileObj, config, function (err, job) { There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
gcsFileObj, | ||
{ | ||
format: options.format, | ||
gzip: options.gzip | ||
}, | ||
function (err, job) { | ||
if (err) { | ||
return callback(err); | ||
} | ||
console.log('ExportTableToGCS: submitted job %s!', job.id); | ||
|
||
job.on('error', function (err) { | ||
return callback(err); | ||
}); | ||
job.on('complete', function (job) { | ||
return callback(null, job); | ||
}); | ||
} | ||
); | ||
} | ||
// [END export_table_to_gcs] | ||
// [END complete] | ||
|
||
// The command-line program | ||
var cli = require('yargs'); | ||
|
||
var program = module.exports = { | ||
exportTableToGCS: exportTableToGCS, | ||
main: function (args) { | ||
// Run the command-line program | ||
cli.help().strict().parse(args).argv; | ||
} | ||
}; | ||
|
||
cli | ||
.command('export <bucket> <file> <dataset> <table>', 'Export a table from BigQuery to Google Cloud Storage.', { | ||
format: { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These aren't global options, so you should move them into the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed |
||
alias: 'f', | ||
global: true, | ||
requiresArg: true, | ||
type: 'string', | ||
choices: ['JSON', 'CSV', 'AVRO'], | ||
default: 'JSON' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think you should have a default here, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm...when I tested it, the default seemed to be JSON. (Though I agree with the " |
||
}, | ||
gzip: { | ||
global: true, | ||
type: 'boolean', | ||
description: 'Whether to compress the exported table using gzip. Defaults to false.' | ||
} | ||
}, function (options) { | ||
program.exportTableToGCS(options, console.log); | ||
}) | ||
.example( | ||
'node $0 export sample-bigquery-export data.json github_samples natality', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you change the names in these samples to be recognizable as what they are? For example, Better for these examples would be to have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
'Export github_samples:natality to gcs://sample-bigquery-export/data.json as raw JSON' | ||
) | ||
.example( | ||
'node $0 export sample-bigquery-export data.csv github_samples natality -f CSV --gzip', | ||
'Export github_samples:natality to gcs://sample-bigquery-export/data.csv as gzipped CSV' | ||
) | ||
.wrap(100) | ||
.recommendCommands() | ||
.epilogue('For more information, see https://cloud.google.com/bigquery/exporting-data-from-bigquery'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might want to change the link to just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
|
||
if (module === require.main) { | ||
program.main(process.argv.slice(2)); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
// Copyright 2016, Google, Inc. | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
'use strict'; | ||
|
||
var proxyquire = require('proxyquire').noCallThru(); | ||
var bucket = 'bucket'; | ||
var file = 'file'; | ||
var dataset = 'dataset'; | ||
var table = 'table'; | ||
var format = 'JSON'; | ||
|
||
function getSample () { | ||
var bucketMock = { | ||
file: sinon.stub().returns(fileMock) | ||
}; | ||
var storageMock = { | ||
bucket: sinon.stub().returns(bucketMock) | ||
}; | ||
var fileMock = {}; | ||
var metadataMock = { status: { state: 'DONE' } }; | ||
var jobMock = { | ||
getMetadata: sinon.stub().callsArgWith(0, null, metadataMock), | ||
on: sinon.stub() | ||
}; | ||
var tableMock = { | ||
export: sinon.stub().callsArgWith(2, null, jobMock) | ||
}; | ||
var datasetMock = { | ||
table: sinon.stub().returns(tableMock) | ||
}; | ||
var bigqueryMock = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Collapse the extra lines between all the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed |
||
job: sinon.stub().returns(jobMock), | ||
dataset: sinon.stub().returns(datasetMock) | ||
}; | ||
var BigQueryMock = sinon.stub().returns(bigqueryMock); | ||
var StorageMock = sinon.stub().returns(storageMock); | ||
|
||
return { | ||
program: proxyquire('../tables', { | ||
'@google-cloud/bigquery': BigQueryMock, | ||
'@google-cloud/storage': StorageMock, | ||
yargs: proxyquire('yargs', {}) | ||
}), | ||
mocks: { | ||
BigQuery: BigQueryMock, | ||
bigquery: bigqueryMock, | ||
Storage: StorageMock, | ||
storage: storageMock, | ||
metadata: metadataMock, | ||
job: jobMock, | ||
table: tableMock, | ||
bucket: bucketMock, | ||
dataset: datasetMock | ||
} | ||
}; | ||
} | ||
|
||
describe('bigquery:tables', function () { | ||
describe('exportTable', function () { | ||
it('should export to a table', function () { | ||
var example = getSample(); | ||
var options = { | ||
bucket: bucket, | ||
file: file, | ||
dataset: dataset, | ||
table: table, | ||
format: format, | ||
gzip: true | ||
}; | ||
var callback = sinon.stub(); | ||
example.mocks.job.on.withArgs('complete').callsArgWith(1, example.mocks.metadata); | ||
|
||
example.program.exportTableToGCS(options, callback); | ||
|
||
assert(example.mocks.storage.bucket.calledWith(options.bucket), 'bucket found'); | ||
assert(example.mocks.bucket.file.calledWith(options.file), 'file found'); | ||
assert(example.mocks.bigquery.dataset.calledWith(options.dataset), 'dataset found'); | ||
assert(example.mocks.dataset.table.calledWith(options.table), 'table found'); | ||
assert(example.mocks.table.export.calledOnce, 'table.export called once'); | ||
assert(console.log.calledWith('ExportTableToGCS: submitted job %s!', example.mocks.job.id), | ||
'job submittal was reported' | ||
); | ||
|
||
assert(callback.calledOnce, 'callback called once'); | ||
assert.equal(callback.firstCall.args.length, 2, 'callback received 2 arguments'); | ||
assert.ifError(callback.firstCall.args[0], 'callback did not receive error'); | ||
assert.equal(callback.firstCall.args[1], example.mocks.metadata, 'callback received metadata'); | ||
}); | ||
|
||
it('should handle export error', function () { | ||
var error = new Error('exportTableToGCSError'); | ||
var example = getSample(); | ||
var callback = sinon.stub(); | ||
example.mocks.table.export = sinon.stub().callsArgWith(2, error); | ||
example.program.exportTableToGCS({ format: format }, callback); | ||
|
||
assert(callback.calledOnce, 'callback called once'); | ||
assert.equal(callback.firstCall.args.length, 1, 'callback received 1 argument'); | ||
assert.equal(callback.firstCall.args[0], error, 'callback received error'); | ||
}); | ||
|
||
it('should handle job-processing error', function () { | ||
var error = new Error('exportTableToGCSError'); | ||
var example = getSample(); | ||
var callback = sinon.stub(); | ||
example.mocks.job.on.withArgs('error').callsArgWith(1, error); | ||
example.program.exportTableToGCS({ format: format }, callback); | ||
|
||
assert(callback.calledOnce, 'callback called once'); | ||
assert.equal(callback.firstCall.args.length, 1, 'callback received 1 argument'); | ||
assert.equal(callback.firstCall.args[0], error, 'callback received error'); | ||
}); | ||
}); | ||
|
||
describe('main', function () { | ||
it('should call exportTableToGCS', function () { | ||
var program = getSample().program; | ||
program.exportTableToGCS = sinon.stub(); | ||
|
||
program.main(['export', bucket, file, dataset, table]); | ||
assert(program.exportTableToGCS.calledOnce, 'exportTableToGCS called'); | ||
}); | ||
|
||
it('should recognize --gzip flag', function () { | ||
var program = getSample().program; | ||
program.exportTableToGCS = sinon.stub(); | ||
|
||
program.main(['export', bucket, file, dataset, table, '--gzip']); | ||
assert(program.exportTableToGCS.calledOnce, 'exportTableToGCS called once'); | ||
|
||
var firstArgs = program.exportTableToGCS.firstCall.args; | ||
assert.equal(firstArgs.length, 2, 'exportTableToGCS received 2 arguments'); | ||
assert(firstArgs[0], 'exportTableToGCS received options'); | ||
assert(firstArgs[0].gzip, 'exportTableToGCS received gzip as True'); | ||
}); | ||
|
||
it('should recognize --format flag', function () { | ||
var program = getSample().program; | ||
program.exportTableToGCS = sinon.stub(); | ||
|
||
program.main(['export', bucket, file, dataset, table, '--format', 'CSV']); | ||
assert(program.exportTableToGCS.calledOnce, 'exportTableToGCS called once'); | ||
|
||
var firstArgs = program.exportTableToGCS.firstCall.args; | ||
assert.equal(firstArgs.length, 2, 'exportTableToGCS received 2 arguments'); | ||
assert(firstArgs[0], 'exportTableToGCS received options'); | ||
assert.equal(firstArgs[0].format, 'CSV', 'exportTableToGCS received format as CSV'); | ||
}); | ||
}); | ||
}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hopefully doing this as a function (for brevity) is fine.