Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] Speed up copy_local_genomes.py with symbolic links #181

Merged
merged 11 commits into from
Sep 18, 2022
2 changes: 1 addition & 1 deletion doc/configuring.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ curl -L https://osf.io/ckbq3/download -o outputs.private/abundtrim/podar.abundtr
and then confirm that the config file `conf-private.yml` has the following content:

```yaml
sample:
samples:
- podar

outdir: outputs.private/
Expand Down
26 changes: 23 additions & 3 deletions genome_grist/copy_local_genomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,14 @@ def main():
p.add_argument('genome_files', nargs='+')
p.add_argument('-o', '--output-csv', required=True)
p.add_argument('-d', '--output-directory', required=True)
p.add_argument('--sym', required=False, action= 'store_true')
args = p.parse_args()

# Create directories if not exist
mr-eyes marked this conversation as resolved.
Show resolved Hide resolved
try:
os.makedirs(args.output_directory)
except FileExistsError:
pass

output_fp = open(args.output_csv, 'wt')
w = csv.DictWriter(output_fp, fieldnames=['ident',
Expand Down Expand Up @@ -50,15 +57,28 @@ def main():

print(f"read identifer '{ident}' and name '{remainder}'")

destfile = os.path.join(args.output_directory, f"{ident}_genomic.fna.gz")

is_gzipped = False
with contextlib.suppress(OSError):
with gzip.open(filename) as fp:
fp.read(1)
is_gzipped = True

if is_gzipped:
destfile = os.path.join(args.output_directory, f"{ident}_genomic.fna.gz")


if args.sym and not is_gzipped:
print(f"--sym option requires the Fasta files to be gzipped first.", file= sys.stderr)
mr-eyes marked this conversation as resolved.
Show resolved Hide resolved
sys.exit(1)
mr-eyes marked this conversation as resolved.
Show resolved Hide resolved

if args.sym and is_gzipped:
print(f"symbolic linking '{filename}' to '{destfile}'")
_src = os.path.abspath(filename)
_dst = os.path.abspath(destfile)
if os.path.islink(_dst):
print(f"symlink {_dst} already exist, consider removing it first.", file= sys.stderr)
mr-eyes marked this conversation as resolved.
Show resolved Hide resolved
sys.exit(1)
os.symlink(_src, _dst)
elif is_gzipped:
print(f"copying '{filename}' to '{destfile}'")
shutil.copyfile(filename, destfile)
else:
Expand Down