Skip to content

Commit

Permalink
Merge pull request #138 from hraban/bq-export-gzip-flag
Browse files Browse the repository at this point in the history
Allow gzip exported bigquery CSV with -z flag
  • Loading branch information
Jonathan Wayne Parrott committed Nov 10, 2015
2 parents 281a551 + ff11f29 commit 19d830f
Showing 1 changed file with 16 additions and 5 deletions.
21 changes: 16 additions & 5 deletions bigquery/api/export_data_to_cloud_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
def export_table(bigquery, cloud_storage_path,
project_id, dataset_id, table_id,
export_format="CSV",
num_retries=5):
num_retries=5,
compression="NONE"):
"""
Starts an export job
Expand All @@ -47,6 +48,8 @@ def export_table(bigquery, cloud_storage_path,
e.g. gs://mybucket/myfolder/
export_format: format to export in;
"CSV", "NEWLINE_DELIMITED_JSON", or "AVRO".
compression: format to compress results with,
"NONE" (default) or "GZIP".
Returns: an extract job resource representing the
job, see https://cloud.google.com/bigquery/docs/reference/v2/jobs
Expand All @@ -66,7 +69,8 @@ def export_table(bigquery, cloud_storage_path,
'tableId': table_id,
},
'destinationUris': [cloud_storage_path],
'destinationFormat': export_format
'destinationFormat': export_format,
'compression': compression
}
}
}
Expand Down Expand Up @@ -101,7 +105,7 @@ def poll_job(bigquery, job):

# [START run]
def main(cloud_storage_path, project_id, dataset_id, table_id,
num_retries, interval, export_format="CSV"):
num_retries, interval, export_format="CSV", compression="NONE"):
# [START build_service]
# Grab the application's default credentials from the environment.
credentials = GoogleCredentials.get_application_default()
Expand All @@ -117,7 +121,8 @@ def main(cloud_storage_path, project_id, dataset_id, table_id,
dataset_id,
table_id,
num_retries=num_retries,
export_format=export_format)
export_format=export_format,
compression=compression)
poll_job(bigquery, job)
# [END run]

Expand All @@ -144,6 +149,11 @@ def main(cloud_storage_path, project_id, dataset_id, table_id,
help='Number of times to retry in case of 500 error.',
type=int,
default=5)
parser.add_argument(
'-z', '--gzip',
help='compress resultset with gzip',
action='store_true',
default=False)

args = parser.parse_args()

Expand All @@ -153,5 +163,6 @@ def main(cloud_storage_path, project_id, dataset_id, table_id,
args.dataset_id,
args.table_id,
args.num_retries,
args.poll_interval)
args.poll_interval,
compression="GZIP" if args.gzip else "NONE")
# [END main]

0 comments on commit 19d830f

Please sign in to comment.