Skip to content

Commit

Permalink
Checking if allele IDs in the profiles are missing from the schema wh…
Browse files Browse the repository at this point in the history
…en creating the FASTA files used by the AlleleCallEvaluator module.
  • Loading branch information
rfm-targa committed Sep 9, 2024
1 parent 5ea008f commit 2f41727
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 6 deletions.
28 changes: 22 additions & 6 deletions CHEWBBACA/AlleleCallEvaluator/evaluate_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@


import os
import sys
import json
import pandas as pd

Expand Down Expand Up @@ -177,6 +178,7 @@ def profile_column_to_fasta(locus, schema_directory, allelic_profiles,
alleles = {k.split('_')[-1]: v for k, v in alleles.items()}

sequences = []
failed = [locus]
for i, allele in enumerate(locus_column):
clean_class = allele.split('INF-')[-1]
if clean_class in alleles:
Expand All @@ -185,11 +187,19 @@ def profile_column_to_fasta(locus, schema_directory, allelic_profiles,
record = fao.fasta_str_record(ct.FASTA_RECORD_TEMPLATE,
[f'{locus}_{current_sample}', seq])
sequences.append(record)

fasta_file = fo.join_paths(output_directory, [f'{locus}.fasta'])
fo.write_lines(sequences, fasta_file)

return fasta_file
# FASTA file does not contain the allele
# Will happen if alleles are not added to the schema during allele calling
# Using the --no-inferred for allele calling option leads to this
else:
print(f'Could not get allele {clean_class} for locus {locus}.')
failed.append(clean_class)

if len(failed) == 1:
fasta_file = fo.join_paths(output_directory, [f'{locus}.fasta'])
fo.write_lines(sequences, fasta_file)
return fasta_file
else:
return failed


def concatenate_loci_alignments(sample, loci, fasta_index, output_directory):
Expand All @@ -214,7 +224,7 @@ def concatenate_loci_alignments(sample, loci, fasta_index, output_directory):
try:
alignment += str(fasta_index[seqid].seq)
except Exception as e:
print(f'Could not get {sample} allele for locus {locus}.', e)
print(f'Could not get the aligned {locus} allele for sample {sample} to create the sample MSA.')
# Save alignment for sample
alignment_outfile = fo.join_paths(output_directory,
[f'{sample}_cgMLST_alignment.fasta'])
Expand Down Expand Up @@ -430,6 +440,12 @@ def main(input_files, schema_directory, output_directory, annotations,
cpu_cores,
show_progress=True)

# Check if FASTA files were created
missing_fastas = [r for r in results if type(r) == list]
if len(missing_fastas) > 0:
fo.delete_directory(output_directory)
sys.exit(ct.MISSING_ALLELES.format(len(missing_fastas)))

# Translate FASTA files
print('\nTranslating FASTA files...')
translation_inputs = im.divide_list_into_n_chunks(results, len(results))
Expand Down
5 changes: 5 additions & 0 deletions CHEWBBACA/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,3 +645,8 @@
'before the first "." in the filename) cannot be interpreted as chain PDB IDs.')

MISSING_INPUT_ARG = ('Path to input files does not exist. Please provide a valid path.')

MISSING_ALLELES = ('\nCould not create the FASTA files for {0} loci.'
'Some alleles are not in the schema\'s FASTA files. Alleles are not '
'added to the schema if the allele calling process did not '
'complete successfully or if the --no-inferred option is used.')

0 comments on commit 2f41727

Please sign in to comment.