From fcacde01ba44f7a5e5dbe53df561509114da3b71 Mon Sep 17 00:00:00 2001 From: Joe Sapp Date: Fri, 12 Nov 2021 16:13:55 +0000 Subject: [PATCH] Add ability to execute arbitrary commands on each downloaded file I did it this way so each file could be compressed without hard-coding the compression algorithm. But I could see this being used to run a pre-processing script on each downloaded file. --- subscriber/podaac_data_subscriber.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/subscriber/podaac_data_subscriber.py b/subscriber/podaac_data_subscriber.py index baf16ae..985675a 100755 --- a/subscriber/podaac_data_subscriber.py +++ b/subscriber/podaac_data_subscriber.py @@ -25,6 +25,7 @@ import os from os import makedirs from os.path import isdir, basename, join, splitext +import subprocess from urllib.parse import urlencode from urllib.request import urlopen, urlretrieve from datetime import datetime, timedelta @@ -208,6 +209,7 @@ def create_parser(): parser.add_argument("-m", "--minutes", dest="minutes", help = "How far back in time, in minutes, should the script look for data. If running this script as a cron, this value should be equal to or greater than how often your cron runs (default: 60 minutes).", type=int, default=60) # noqa E501 parser.add_argument("-e", "--extensions", dest="extensions", help = "The extensions of products to download. Default is [.nc, .h5, .zip]", default=[".nc", ".h5", ".zip"], nargs='*') # noqa E501 + parser.add_argument("--process", dest="process_cmd", help = "Processing command to run on each downloaded file (e.g., compression). Can be specified multiple times.", action='append') parser.add_argument("--version", dest="version", action="store_true",help="Display script version information and exit.") # noqa E501 parser.add_argument("--verbose", dest="verbose", action="store_true",help="Verbose mode.") # noqa E501 @@ -244,6 +246,7 @@ def run(): short_name = args.collection extensions = args.extensions + process_cmd = args.process_cmd data_path = args.outputDirectory # You should change `data_path` to a suitable download path on your file system. @@ -480,6 +483,16 @@ def prepare_cycles_output(data_cycles, prefix, file): write_path = join(prefix, cycle_dir, basename(file)) return write_path + def process_file(output_path): + if not process_cmd: + return + else: + for cmd in process_cmd: + if args.verbose: + print(f'Running: {cmd} {output_path}') + subprocess.run(cmd.split() + [output_path], + check=True) + for f in downloads: try: for extension in extensions: @@ -495,6 +508,7 @@ def prepare_cycles_output(data_cycles, prefix, file): output_path = prepare_cycles_output( cycles, data_path, f) urlretrieve(f, output_path) + process_file(output_path) print(str(datetime.now()) + " SUCCESS: " + f) success_cnt = success_cnt + 1 except Exception as e: