Skip to content

Commit

Permalink
Update to work with MUSCLE v5
Browse files Browse the repository at this point in the history
  • Loading branch information
rrwick committed Nov 19, 2021
1 parent 87953f2 commit 8e475a8
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 8 deletions.
22 changes: 14 additions & 8 deletions trycycler/msa.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from .log import log, section_header, explanation
from .misc import load_fasta, count_substrings, get_sequence_file_type
from .software import check_muscle
from .software import check_muscle, get_muscle_version


def msa(args):
Expand Down Expand Up @@ -84,30 +84,36 @@ def run_muscle_all_pieces(temp_dir: pathlib.Path, threads):
explanation('Trycycler now runs Muscle on each of the pieces to turn them into multiple '
'sequence alignments.')
fasta_files = sorted(temp_dir.glob('*.fasta'))
muscle_version = get_muscle_version()

filenames = []
parameters = []
for f in fasta_files:
input_fasta = str(f)
output_filename = input_fasta.replace('.fasta', '_msa.fasta')
filenames.append((input_fasta, output_filename))
parameters.append((input_fasta, output_filename, muscle_version))
i = 0
if threads == 1:
for f in filenames:
for f in parameters:
run_muscle_one_piece(f)
i += 1
log(f'\rpieces: {i}', end='')
else:
with multiprocessing.Pool(threads) as pool:
for _ in pool.imap_unordered(run_muscle_one_piece, filenames):
for _ in pool.imap_unordered(run_muscle_one_piece, parameters):
i += 1
log(f'\rpieces: {i}', end='')
log('\n')


def run_muscle_one_piece(filenames):
input_filename, output_filename = filenames
def run_muscle_one_piece(parameters):
input_filename, output_filename, muscle_version = parameters
muscle_output_filename = output_filename.replace('_msa.fasta', '_muscle.out')
muscle_command = ['muscle', '-in', input_filename, '-out', output_filename]
if muscle_version.startswith('3'):
muscle_command = ['muscle', '-in', input_filename, '-out', output_filename]
elif muscle_version.startswith('5'):
muscle_command = ['muscle', '-align', input_filename, '-output', output_filename]
else:
assert False
with open(muscle_output_filename, 'wt') as muscle_output:
subprocess.run(muscle_command, stderr=muscle_output)

Expand Down
12 changes: 12 additions & 0 deletions trycycler/software.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@ def check_muscle():
output = output.decode().strip()
version = parse_muscle_version(output)
log(f' MUSCLE: v{version}')
if not version.startswith('3') and not version.startswith('5'):
sys.exit('\nError: either MUSCLE v3 or MUSCLE v5 is required')


def get_muscle_version():
"""
This function assumes that the check_muscle function has already been run, so it doesn't catch
exceptions.
"""
output = subprocess.check_output(['muscle', '-version'], stderr=subprocess.STDOUT)
output = output.decode().strip()
return parse_muscle_version(output)


def parse_muscle_version(output):
Expand Down

0 comments on commit 8e475a8

Please sign in to comment.