Skip to content

Commit

Permalink
Avoid blank reads, as part of #442.
Browse files Browse the repository at this point in the history
They caused a TypeError in iva assembly.
  • Loading branch information
donkirkby committed Oct 29, 2018
1 parent e1903d6 commit 41d7a18
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 9 deletions.
3 changes: 2 additions & 1 deletion micall/core/denovo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from glob import glob
from io import StringIO
from shutil import rmtree
from subprocess import Popen, call
from subprocess import Popen
from tempfile import mkdtemp

from Bio import SeqIO
Expand Down Expand Up @@ -142,6 +142,7 @@ def denovo(fastq1_path, fastq2_path, contigs, work_dir='.', merged_contigs_csv=N


if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser()
parser.add_argument('fastq1')
parser.add_argument('fastq2')
Expand Down
1 change: 1 addition & 0 deletions micall/core/trim_fastqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def trim(original_fastq_filenames,
'-o', trimmed_fastq_filenames[0],
'-p', trimmed_fastq_filenames[1],
'--quiet',
'--minimum-length', '1',
censored_filenames[0],
censored_filenames[1]]
cut_adapt.check_output(cutadapt_args)
Expand Down
2 changes: 1 addition & 1 deletion micall/tests/microtest/make_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def main():
ref_nuc_section = ''.join(ref_nuc_section)
if fastq_file.is_reversed:
ref_nuc_section = reverse_and_complement(ref_nuc_section)
phred_scores = 'A' * (ref_end-ref_start)
phred_scores = 'A' * len(ref_nuc_section)
file_num = '2' if fastq_file.is_reversed else '1'
for cluster in range(section.count):
f.write('@M01234:01:000000000-AAAAA:1:1101:{}:{:04} {}:N:0:1\n'.format(
Expand Down
25 changes: 18 additions & 7 deletions micall/utils/denovo_simplify.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def parse_args():


class MicallDD(DD):
test_names = ('one_contig', 'multiple_genotypes')
test_names = ('one_contig', 'multiple_genotypes', 'type_error')

def __init__(self,
filename1,
Expand Down Expand Up @@ -65,7 +65,6 @@ def __init__(self,

def _test(self, read_indexes):
read_count = len(read_indexes)
read_indexes = reversed(read_indexes)
self.write_simple_fastq(self.simple1, self.simple2, read_indexes)
workdir = os.path.dirname(self.simple1)
os.chdir(workdir)
Expand All @@ -77,30 +76,35 @@ def _test(self, read_indexes):
self.bad_cycles_filename,
(trimmed_filename1, trimmed_filename2),
use_gzip=False)
exception = None
# noinspection PyBroadException
try:
denovo(trimmed_filename1, trimmed_filename2, contigs_csv, workdir)
except Exception:
except Exception as ex:
logger.warning('Assembly failed.', exc_info=True)
return DD.UNRESOLVED
exception = ex
contigs_csv.seek(0)

result = self.get_result(contigs_csv, read_count)
result = self.get_result(contigs_csv, read_count, exception)
if result == DD.FAIL:
os.rename(self.simple1, self.best1)
os.rename(self.simple2, self.best2)
return result

@staticmethod
def check_one_contig(contigs_csv, read_count):
def check_one_contig(contigs_csv, read_count, exception):
if exception is not None:
return DD.UNRESOLVED
contig_count = len(contigs_csv.readlines())
logger.debug('Result: %d contigs from %d reads.',
contig_count,
read_count)
return DD.FAIL if contig_count == 1 else DD.PASS

@staticmethod
def check_multiple_genotypes(contigs_csv, read_count):
def check_multiple_genotypes(contigs_csv, read_count, exception):
if exception is not None:
return DD.UNRESOLVED
reader = DictReader(contigs_csv)
genotypes = sorted({row['genotype'] for row in reader})
genotype_count = len(genotypes)
Expand All @@ -110,6 +114,13 @@ def check_multiple_genotypes(contigs_csv, read_count):
genotypes)
return DD.FAIL if genotype_count > 2 else DD.PASS

@staticmethod
def check_type_error(_contigs_csv, read_count, exception):
logger.debug('Result: %s exception from %d reads.',
exception,
read_count)
return DD.FAIL if isinstance(exception, TypeError) else DD.PASS

def write_simple_fastq(self, filename1, filename2, read_indexes):
selected_reads = (self.reads[i] for i in read_indexes)
with open(filename1, 'w') as f1, open(filename2, 'w') as f2:
Expand Down

0 comments on commit 41d7a18

Please sign in to comment.