From fee1aeead3d7cb601f223c8164587328f4251438 Mon Sep 17 00:00:00 2001 From: Mohamed Abuelanin Date: Fri, 4 Mar 2022 13:49:26 -0800 Subject: [PATCH 01/10] fix private genomes config file --- doc/configuring.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/configuring.md b/doc/configuring.md index 6062916..f370af3 100644 --- a/doc/configuring.md +++ b/doc/configuring.md @@ -242,7 +242,7 @@ curl -L https://osf.io/ckbq3/download -o outputs.private/abundtrim/podar.abundtr and then confirm that the config file `conf-private.yml` has the following content: ```yaml -sample: +samples: - podar outdir: outputs.private/ From 2f2230b5471098364a09232f81c7800fda7a6091 Mon Sep 17 00:00:00 2001 From: Mohamed Abuelanin Date: Fri, 4 Mar 2022 14:19:47 -0800 Subject: [PATCH 02/10] :rocket: speedup with symlinks --- genome_grist/copy_local_genomes.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/genome_grist/copy_local_genomes.py b/genome_grist/copy_local_genomes.py index 0085c6b..626a47a 100644 --- a/genome_grist/copy_local_genomes.py +++ b/genome_grist/copy_local_genomes.py @@ -11,13 +11,21 @@ import shutil import gzip import contextlib +from pathlib import Path def main(): p = argparse.ArgumentParser() p.add_argument('genome_files', nargs='+') p.add_argument('-o', '--output-csv', required=True) p.add_argument('-d', '--output-directory', required=True) + p.add_argument('--sym', required=False, action= 'store_true') args = p.parse_args() + + # Create directories if not exist + try: + os.makedirs(args.output_directory) + except FileExistsError: + pass output_fp = open(args.output_csv, 'wt') w = csv.DictWriter(output_fp, fieldnames=['ident', @@ -50,15 +58,24 @@ def main(): print(f"read identifer '{ident}' and name '{remainder}'") - destfile = os.path.join(args.output_directory, f"{ident}_genomic.fna.gz") - is_gzipped = False with contextlib.suppress(OSError): with gzip.open(filename) as fp: fp.read(1) is_gzipped = True - if is_gzipped: + destfile = os.path.join(args.output_directory, f"{ident}_genomic.fna") + destfile = f"{destfile}.gz" if is_gzipped else destfile + + if args.sym: + print(f"symbolic linking '{filename}' to '{destfile}'") + _src = os.path.abspath(filename) + _dst = os.path.abspath(destfile) + if os.path.islink(_dst): + print(f"symlink {_dst} already exist, consider removing it first.", file= sys.stderr) + sys.exit(1) + os.symlink(_src, _dst) + elif is_gzipped: print(f"copying '{filename}' to '{destfile}'") shutil.copyfile(filename, destfile) else: From 07d520a33e1a1cbdd1b20ff7691903236acb8a9a Mon Sep 17 00:00:00 2001 From: Mohamed Abuelanin Date: Fri, 4 Mar 2022 14:22:56 -0800 Subject: [PATCH 03/10] remove unused import --- genome_grist/copy_local_genomes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/genome_grist/copy_local_genomes.py b/genome_grist/copy_local_genomes.py index 626a47a..2664746 100644 --- a/genome_grist/copy_local_genomes.py +++ b/genome_grist/copy_local_genomes.py @@ -11,7 +11,6 @@ import shutil import gzip import contextlib -from pathlib import Path def main(): p = argparse.ArgumentParser() From c44b930a00ba4d935e6381cae548024f26165f1b Mon Sep 17 00:00:00 2001 From: Mohamed Abuelanin Date: Fri, 4 Mar 2022 14:48:21 -0800 Subject: [PATCH 04/10] symink only works with gzipped files --- genome_grist/copy_local_genomes.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/genome_grist/copy_local_genomes.py b/genome_grist/copy_local_genomes.py index 2664746..86bba04 100644 --- a/genome_grist/copy_local_genomes.py +++ b/genome_grist/copy_local_genomes.py @@ -63,10 +63,9 @@ def main(): fp.read(1) is_gzipped = True - destfile = os.path.join(args.output_directory, f"{ident}_genomic.fna") - destfile = f"{destfile}.gz" if is_gzipped else destfile + destfile = os.path.join(args.output_directory, f"{ident}_genomic.fna.gz") - if args.sym: + if args.sym and is_gzipped: print(f"symbolic linking '{filename}' to '{destfile}'") _src = os.path.abspath(filename) _dst = os.path.abspath(destfile) From a18a71642e7c5c88f93efd309520f62a618cfb79 Mon Sep 17 00:00:00 2001 From: Mohamed Abuelanin Date: Fri, 4 Mar 2022 14:52:23 -0800 Subject: [PATCH 05/10] add user message if used --sym with non gzipped files --- genome_grist/copy_local_genomes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/genome_grist/copy_local_genomes.py b/genome_grist/copy_local_genomes.py index 86bba04..e579a5f 100644 --- a/genome_grist/copy_local_genomes.py +++ b/genome_grist/copy_local_genomes.py @@ -65,6 +65,11 @@ def main(): destfile = os.path.join(args.output_directory, f"{ident}_genomic.fna.gz") + + if args.sym and not is_gzipped: + print(f"--sym option requires the Fasta files to be gzipped first.", file= sys.stderr) + sys.exit(1) + if args.sym and is_gzipped: print(f"symbolic linking '{filename}' to '{destfile}'") _src = os.path.abspath(filename) From 0feb472d588773fbcd9f636100641f35e5a5eb24 Mon Sep 17 00:00:00 2001 From: Mohamed Abuelanin Date: Sat, 5 Mar 2022 18:57:07 +0200 Subject: [PATCH 06/10] Update genome_grist/copy_local_genomes.py Co-authored-by: C. Titus Brown --- genome_grist/copy_local_genomes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genome_grist/copy_local_genomes.py b/genome_grist/copy_local_genomes.py index e579a5f..f203c20 100644 --- a/genome_grist/copy_local_genomes.py +++ b/genome_grist/copy_local_genomes.py @@ -67,7 +67,7 @@ def main(): if args.sym and not is_gzipped: - print(f"--sym option requires the Fasta files to be gzipped first.", file= sys.stderr) + print("--sym option requires the FASTA files to be already gzipped.", file=sys.stderr) sys.exit(1) if args.sym and is_gzipped: From d0edff53992e56d5c1c88da3ac95d16fcb64df75 Mon Sep 17 00:00:00 2001 From: Mohamed Abuelanin Date: Sat, 5 Mar 2022 18:57:18 +0200 Subject: [PATCH 07/10] Update genome_grist/copy_local_genomes.py Co-authored-by: C. Titus Brown --- genome_grist/copy_local_genomes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genome_grist/copy_local_genomes.py b/genome_grist/copy_local_genomes.py index f203c20..f45e89e 100644 --- a/genome_grist/copy_local_genomes.py +++ b/genome_grist/copy_local_genomes.py @@ -75,7 +75,7 @@ def main(): _src = os.path.abspath(filename) _dst = os.path.abspath(destfile) if os.path.islink(_dst): - print(f"symlink {_dst} already exist, consider removing it first.", file= sys.stderr) + print(f"symlink {_dst} already exists, consider removing it first.", file=sys.stderr) sys.exit(1) os.symlink(_src, _dst) elif is_gzipped: From c6405f88f1febebf9c1dc37d9e0d450500888f06 Mon Sep 17 00:00:00 2001 From: Mohamed Abuelanin Date: Sat, 5 Mar 2022 18:57:25 +0200 Subject: [PATCH 08/10] Update genome_grist/copy_local_genomes.py Co-authored-by: C. Titus Brown --- genome_grist/copy_local_genomes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genome_grist/copy_local_genomes.py b/genome_grist/copy_local_genomes.py index f45e89e..b32527a 100644 --- a/genome_grist/copy_local_genomes.py +++ b/genome_grist/copy_local_genomes.py @@ -68,7 +68,7 @@ def main(): if args.sym and not is_gzipped: print("--sym option requires the FASTA files to be already gzipped.", file=sys.stderr) - sys.exit(1) + sys.exit(-1) if args.sym and is_gzipped: print(f"symbolic linking '{filename}' to '{destfile}'") From 2f5ab1cc4a356b252a0ed350bb7665317a54a90d Mon Sep 17 00:00:00 2001 From: Mohamed Abuelanin Date: Sat, 5 Mar 2022 18:57:36 +0200 Subject: [PATCH 09/10] Update genome_grist/copy_local_genomes.py Co-authored-by: C. Titus Brown --- genome_grist/copy_local_genomes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genome_grist/copy_local_genomes.py b/genome_grist/copy_local_genomes.py index b32527a..29c5996 100644 --- a/genome_grist/copy_local_genomes.py +++ b/genome_grist/copy_local_genomes.py @@ -20,7 +20,7 @@ def main(): p.add_argument('--sym', required=False, action= 'store_true') args = p.parse_args() - # Create directories if not exist + # Create directory if does not exist try: os.makedirs(args.output_directory) except FileExistsError: From 94560604ee038a37d84dc3f1a09c9fce94500aa7 Mon Sep 17 00:00:00 2001 From: Mohamed Abuelanin Date: Sat, 5 Mar 2022 09:04:34 -0800 Subject: [PATCH 10/10] Revert "fix private genomes config file" This reverts commit fee1aeead3d7cb601f223c8164587328f4251438. --- doc/configuring.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/configuring.md b/doc/configuring.md index f370af3..6062916 100644 --- a/doc/configuring.md +++ b/doc/configuring.md @@ -242,7 +242,7 @@ curl -L https://osf.io/ckbq3/download -o outputs.private/abundtrim/podar.abundtr and then confirm that the config file `conf-private.yml` has the following content: ```yaml -samples: +sample: - podar outdir: outputs.private/