From a906b4a1e5b71434a9aa49ec24342624d457727c Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 25 Apr 2023 07:17:26 -0400 Subject: [PATCH] multiple fastq directory inputs --- .../geomxngs_fastq_to_dcc/src/delete-url.py | 21 ---- .../src/rename-fastqs.py | 31 ------ .../geomxngs_fastq_to_dcc/src/update-cpu.py | 29 ----- workflows/geomxng/geomxngs_fastq_to_dcc.wdl | 101 ++++++++++++++++-- 4 files changed, 90 insertions(+), 92 deletions(-) delete mode 100644 docker/geomxngs_fastq_to_dcc/src/delete-url.py delete mode 100755 docker/geomxngs_fastq_to_dcc/src/rename-fastqs.py delete mode 100755 docker/geomxngs_fastq_to_dcc/src/update-cpu.py diff --git a/docker/geomxngs_fastq_to_dcc/src/delete-url.py b/docker/geomxngs_fastq_to_dcc/src/delete-url.py deleted file mode 100644 index 69e082de..00000000 --- a/docker/geomxngs_fastq_to_dcc/src/delete-url.py +++ /dev/null @@ -1,21 +0,0 @@ -import argparse -from subprocess import check_call - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument("--backend", required=True) - parser.add_argument("--url", required=True) - - args = parser.parse_args() - backend = args.backend - url = args.url - if not url.endswith("/"): - url += "/" - try: - call_args = ["strato", "rm", "--backend", backend, "-m", "-r", url] - check_call(call_args) - print("Deleted {}".format(url)) - except: - print("Failed to delete {}.".format(url)) diff --git a/docker/geomxngs_fastq_to_dcc/src/rename-fastqs.py b/docker/geomxngs_fastq_to_dcc/src/rename-fastqs.py deleted file mode 100755 index 992ebb91..00000000 --- a/docker/geomxngs_fastq_to_dcc/src/rename-fastqs.py +++ /dev/null @@ -1,31 +0,0 @@ -import os -import argparse - -import pandas as pd - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument("--fastqs", help="FASTQ directory", required=True) - parser.add_argument("--rename", help="Mapping file", required=True) - args = parser.parse_args() - fastqs_dir = args.fastqs - rename = args.rename - if rename is not None: - df = pd.read_csv(rename, sep="\t", header=None, names=["original_name", "new_name"]) - df = df.dropna() - # strip path - df["original_name"] = df["original_name"].str.split("/").str[-1] - df["new_name"] = df["new_name"].str.split("/").str[-1] - - for i in range(len(df)): - d = df.iloc[i] - original_name = d["original_name"] - new_name = d["new_name"] - src = os.path.join(fastqs_dir, original_name) - dest = os.path.join(fastqs_dir, new_name) - if os.path.exists(src): - os.rename(src, dest) - else: - print(original_name + " not found") diff --git a/docker/geomxngs_fastq_to_dcc/src/update-cpu.py b/docker/geomxngs_fastq_to_dcc/src/update-cpu.py deleted file mode 100755 index c64a7a02..00000000 --- a/docker/geomxngs_fastq_to_dcc/src/update-cpu.py +++ /dev/null @@ -1,29 +0,0 @@ -import argparse -import configparser - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument("--ini", help="INI file", required=True) - parser.add_argument("--cpu", help="CPU", required=True) - parser.add_argument("--out", help="Output INI file", required=True) - args = parser.parse_args() - ini_path = args.ini - cpu = args.cpu - output_path = args.out - - config = configparser.ConfigParser() - config.optionxform = str # prevent conversion of keys to lowercase - config.read(ini_path) - processing_keys = ["Processing", "Processing_v2"] - found = False - for processing_key in processing_keys: - if processing_key in config: - config[processing_key]["threads"] = cpu - found = True - break - if not found: - raise ValueError("Processing section not found") - with open(output_path, "wt") as out: - config.write(out) diff --git a/workflows/geomxng/geomxngs_fastq_to_dcc.wdl b/workflows/geomxng/geomxngs_fastq_to_dcc.wdl index 99543243..4d18cbb5 100644 --- a/workflows/geomxng/geomxngs_fastq_to_dcc.wdl +++ b/workflows/geomxng/geomxngs_fastq_to_dcc.wdl @@ -20,7 +20,7 @@ workflow geomxngs_fastq_to_dcc { parameter_meta { ini:"Configuration file in INI format, containing pipeline processing parameters" - fastq_directory:"FASTQ directory URL (e.g. s3://foo/bar/fastqs or gs://foo/bar/fastqs)" + fastq_directory:"FASTQ directory URL (e.g. s3://foo/bar/fastqs or gs://foo/bar/fastqs). Separate multiple directories with a comma" output_directory:"URL to write results (e.g. s3://foo/bar/out or gs://foo/bar/out)" fastq_rename:"Optional 2 column TSV file with no header used to map original FASTQ names to FASTQ names that GeoMX recognizes" docker_registry :"Docker registry" @@ -84,21 +84,100 @@ task geomxngs_task { export DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1 monitor_script.sh > monitoring.log & - # update cpus in ini fie - python /software/scripts/update-cpu.py --ini ~{ini} --cpu ~{cpu} --out local.ini - mkdir fastqs - strato sync --backend ~{backend} -m ~{fastq_directory} fastqs/ - if [[ '~{fastq_rename}' != '' ]]; then - python /software/scripts/rename-fastqs.py --fastqs fastqs --rename ~{fastq_rename} - fi + + python < 1: + local_dir = "fastqs" + for local_fastq_dir in local_fastq_dirs: + for f in os.listdir(local_fastq_dir): + file_name = os.path.basename(f) + dest = os.path.join(local_dir, file_name) + counter = 1 + while os.path.exists(dest): + name_tokens = file_name.split('_') + name_tokens[1] = name_tokens[1] + '-' + str(counter) + dest = os.path.join(local_dir, '_'.join(name_tokens)) + counter = counter + 1 + os.rename(os.path.join(local_fastq_dir, f), dest) + CODE geomx_expect.exp strato sync --backend ~{backend} -m results ~{output_directory_stripped} - if [[ '~{delete_fastq_directory}' = 'true' ]]; then - python /software/scripts/delete-url.py --backend ~{backend} --url ~{fastq_directory} - fi + python <