From cb57b8ffbb8e6b56c1abd213ac452730ebae584d Mon Sep 17 00:00:00 2001 From: Jerjou Cheng Date: Thu, 15 Oct 2015 14:41:02 -0700 Subject: [PATCH] Update sample to use native sdk functionality. --- bigquery/api/load_data_by_post.py | 110 +++++++++++------------------- 1 file changed, 39 insertions(+), 71 deletions(-) diff --git a/bigquery/api/load_data_by_post.py b/bigquery/api/load_data_by_post.py index 5527a0551764..102c772d37ba 100755 --- a/bigquery/api/load_data_by_post.py +++ b/bigquery/api/load_data_by_post.py @@ -17,7 +17,7 @@ This sample is used on this page: - https://cloud.google.com/bigquery/loading-data-post-request + https://cloud.google.com/bigquery/loading-data-into-bigquery For more information, see the README.md under /bigquery. """ @@ -26,74 +26,60 @@ import json import time +from apiclient.http import MediaFileUpload + from googleapiclient import discovery -import httplib2 + from oauth2client.client import GoogleCredentials # [START make_post] -def make_post(http, schema, data, project_id, dataset_id, table_id): +def load_data(schema_path, data_path, project_id, dataset_id, table_id): """ Creates an http POST request for loading data into a bigquery table Args: - http: an authorized httplib2 client, - schema: a valid bigquery schema, - see https://cloud.google.com/bigquery/docs/reference/v2/tables, - data: valid JSON to insert into the table + schema_path: the path to a file containing a valid bigquery schema. + see https://cloud.google.com/bigquery/docs/reference/v2/tables + data_path: the name of the file to insert into the table. Returns: an http.request object """ - url = ('https://www.googleapis.com/upload/bigquery/v2/projects/' + - project_id + '/jobs') - # Create the body of the request, separated by a boundary of xxx - resource = ('--xxx\n' + - 'Content-Type: application/json; charset=UTF-8\n' + '\n' + - '{\n' + - ' "configuration": {\n' + - ' "load": {\n' + - ' "schema": {\n' - ' "fields": ' + str(schema) + '\n' + - ' },\n' + - ' "destinationTable": {\n' + - ' "projectId": "' + project_id + '",\n' + - ' "datasetId": "' + dataset_id + '",\n' + - ' "tableId": "' + table_id + '"\n' + - ' }\n' + - ' }\n' + - ' }\n' + - '}\n' + - '--xxx\n' + - 'Content-Type: application/octet-stream\n' + - '\n') - # Append data to the request body - resource += data - - # Signify the end of the body - resource += ('--xxx--\n') - - headers = {'Content-Type': 'multipart/related; boundary=xxx'} - - return http.request(url, - method='POST', - body=resource, - headers=headers) - # [END make_post] - - -# [START poll_job] -def poll_job(bigquery, job): - """Waits for a job to complete.""" + # Create a bigquery service object, using the application's default auth + credentials = GoogleCredentials.get_application_default() + bigquery = discovery.build('bigquery', 'v2', credentials=credentials) + + insert_request = bigquery.jobs().insert( + projectId=project_id, + body={ + "configuration": { + "load": { + "schema": { + "fields": json.load(open(schema_path, 'r')) + }, + "destinationTable": { + "projectId": project_id, + "datasetId": dataset_id, + "tableId": table_id + } + } + } + }, + media_body=MediaFileUpload( + data_path, + mimetype="application/octet-stream")) + + job = insert_request.execute() print('Waiting for job to finish...') - request = bigquery.jobs().get( + status_request = bigquery.jobs().get( projectId=job['jobReference']['projectId'], jobId=job['jobReference']['jobId']) while True: - result = request.execute(num_retries=2) + result = status_request.execute(num_retries=2) if result['status']['state'] == 'DONE': if 'errorResult' in result['status']: @@ -102,35 +88,17 @@ def poll_job(bigquery, job): return time.sleep(1) -# [END poll_job] +# [END make_post] # [START main] def main(project_id, dataset_id, table_name, schema_path, data_path): - credentials = GoogleCredentials.get_application_default() - http = credentials.authorize(httplib2.Http()) - bigquery = discovery.build('bigquery', 'v2', credentials=credentials) - - with open(schema_path, 'r') as schema_file: - schema = schema_file.read() - - with open(data_path, 'r') as data_file: - data = data_file.read() - - resp, content = make_post( - http, - schema, - data, + load_data( + schema_path, + data_path, project_id, dataset_id, table_name) - - if resp.status == 200: - job = json.loads(content) - poll_job(bigquery, job) - print("Success!") - else: - print("Http error code: {}".format(resp.status)) # [END main] if __name__ == '__main__':