Skip to content

Commit

Permalink
Added methods to write a conseq_all.csv file as per #549.
Browse files Browse the repository at this point in the history
  • Loading branch information
rhliang committed Apr 15, 2020
1 parent 1dc5d53 commit ecf3bd0
Show file tree
Hide file tree
Showing 2 changed files with 157 additions and 11 deletions.
39 changes: 28 additions & 11 deletions micall/core/aln2counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -922,18 +922,17 @@ def _create_consensus_writer(conseq_file):
'sequence'],
lineterminator=os.linesep)

def write_consensus_header(self, conseq_file):
self.conseq_writer = self._create_consensus_writer(conseq_file)
self.conseq_writer.writeheader()

def get_consensus_rows(self, seed_amino_entries):
for mixture_cutoff in self.conseq_mixture_cutoffs:
def get_consensus_rows(self, seed_amino_entries, ignore_coverage=False):
mixture_cutoffs = ([MAX_CUTOFF] if ignore_coverage
else self.conseq_mixture_cutoffs)
min_coverage = 1 if ignore_coverage else self.consensus_min_coverage
for mixture_cutoff in mixture_cutoffs:
consensus = ''
offset = None
for pos, seed_amino in seed_amino_entries:
for nuc_index, seed_nuc in enumerate(seed_amino.nucleotides):
nuc_coverage = seed_nuc.get_coverage()
if nuc_coverage < self.consensus_min_coverage:
if nuc_coverage < min_coverage:
if offset is not None:
consensus += 'x'
else:
Expand All @@ -952,13 +951,31 @@ def get_consensus_rows(self, seed_amino_entries):
'offset': offset,
'sequence': consensus}

def write_consensus(self, conseq_writer=None):
conseq_writer = conseq_writer or self.conseq_writer
def _write_consensus_helper(self, csv_writer, ignore_coverage=False):
csv_writer = csv_writer or (self.conseq_writer if not ignore_coverage
else self.conseq_all_writer)
seed_amino_entries = [(seed_amino.consensus_nuc_index, seed_amino)
for seed_amino in self.seed_aminos[0]]
for row in self.get_consensus_rows(seed_amino_entries):
for row in self.get_consensus_rows(
seed_amino_entries,
ignore_coverage=ignore_coverage
):
row['region'] = self.detail_seed
conseq_writer.writerow(row)
csv_writer.writerow(row)

def write_consensus_header(self, conseq_file):
self.conseq_writer = self._create_consensus_writer(conseq_file)
self.conseq_writer.writeheader()

def write_consensus(self, conseq_writer=None):
self._write_consensus_helper(conseq_writer)

def write_consensus_all_header(self, conseq_all_file):
self.conseq_all_writer = self._create_consensus_writer(conseq_all_file)
self.conseq_all_writer.writeheader()

def write_consensus_all(self, conseq_all_writer=None):
self._write_consensus_helper(conseq_all_writer, ignore_coverage=True)

def write_consensus_regions_header(self, conseq_region_file):
self.conseq_region_writer = csv.DictWriter(conseq_region_file,
Expand Down
129 changes: 129 additions & 0 deletions micall/tests/test_aln2counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,135 @@ def testConsensusLowCoverageAtEnd(self):

self.assertMultiLineEqual(expected_text, self.report_file.getvalue())

def testConsensusAllFromTwoReads(self):
""" The second read is out voted by the first one.
CCC -> P
GGG -> G
"""
# refname,qcut,rank,count,offset,seq
aligned_reads = prepare_reads("""\
R1-seed,15,0,9,0,AAATTT
R1-seed,15,0,1,0,CCCGGG
""")
expected_text = """\
region,q-cutoff,consensus-percent-cutoff,offset,sequence
R1-seed,15,MAX,0,AAATTT
"""

self.report.write_consensus_all_header(self.report_file)
self.report.read(aligned_reads)
self.report.write_consensus_all()

self.assertMultiLineEqual(expected_text, self.report_file.getvalue())

def testConsensusAllWithOffset(self):
# refname,qcut,rank,count,offset,seq
aligned_reads = prepare_reads("""\
R1-seed,15,0,9,3,AAATTT
R1-seed,15,0,1,7,TTGGG
""")
expected_text = """\
region,q-cutoff,consensus-percent-cutoff,offset,sequence
R1-seed,15,MAX,3,AAATTTGGG
"""

self.report.write_consensus_all_header(self.report_file)
self.report.read(aligned_reads)
self.report.write_consensus_all()

self.assertMultiLineEqual(expected_text, self.report_file.getvalue())

def testConsensusAllLowQualitySections(self):
"""Low-quality bases still get reported as x."""
# refname,qcut,rank,count,offset,seq
aligned_reads = prepare_reads("""\
R1-seed,15,0,9,3,NNNTTT
R1-seed,15,0,1,7,TTNGG
""")
expected_text = """\
region,q-cutoff,consensus-percent-cutoff,offset,sequence
R1-seed,15,MAX,6,TTTxGG
"""
self.report.consensus_min_coverage = 1

self.report.write_consensus_all_header(self.report_file)
self.report.read(aligned_reads)
self.report.write_consensus_all()

self.assertMultiLineEqual(expected_text, self.report_file.getvalue())

def testConsensusAllLowQuality(self):
# refname,qcut,rank,count,offset,seq
aligned_reads = prepare_reads("""\
R1-seed,15,0,9,3,NNNNNN
R1-seed,15,0,1,7,NNNNN
""")
expected_text = """\
region,q-cutoff,consensus-percent-cutoff,offset,sequence
"""
self.report.consensus_min_coverage = 1

self.report.write_consensus_all_header(self.report_file)
self.report.read(aligned_reads)
self.report.write_consensus_all()

self.assertMultiLineEqual(expected_text, self.report_file.getvalue())

def testConsensusAllLowCoverageInMiddle(self):
# refname,qcut,rank,count,offset,seq
aligned_reads = prepare_reads("""\
R1-seed,15,0,9,0,AAATTTGGG
R1-seed,15,0,1,0,AAAT
R1-seed,15,0,1,6,GGG
""")
expected_text = """\
region,q-cutoff,consensus-percent-cutoff,offset,sequence
R1-seed,15,MAX,0,AAATTTGGG
"""
self.report.consensus_min_coverage = 10

self.report.write_consensus_all_header(self.report_file)
self.report.read(aligned_reads)
self.report.write_consensus_all()

self.assertMultiLineEqual(expected_text, self.report_file.getvalue())

def testConsensusAllLowCoverageAtStart(self):
# refname,qcut,rank,count,offset,seq
aligned_reads = prepare_reads("""\
R1-seed,15,0,9,0,AAATTTGGG
R1-seed,15,0,1,4,TTGGG
""")
expected_text = """\
region,q-cutoff,consensus-percent-cutoff,offset,sequence
R1-seed,15,MAX,0,AAATTTGGG
"""
self.report.consensus_min_coverage = 10

self.report.write_consensus_all_header(self.report_file)
self.report.read(aligned_reads)
self.report.write_consensus_all()

self.assertMultiLineEqual(expected_text, self.report_file.getvalue())

def testConsensusAllLowCoverageAtEnd(self):
# refname,qcut,rank,count,offset,seq
aligned_reads = prepare_reads("""\
R1-seed,15,0,9,0,AAATTTGGG
R1-seed,15,0,1,0,AAAT
""")
expected_text = """\
region,q-cutoff,consensus-percent-cutoff,offset,sequence
R1-seed,15,MAX,0,AAATTTGGG
"""
self.report.consensus_min_coverage = 10

self.report.write_consensus_all_header(self.report_file)
self.report.read(aligned_reads)
self.report.write_consensus_all()

self.assertMultiLineEqual(expected_text, self.report_file.getvalue())

def testMultiplePrefixAminoReport(self):
""" Assemble counts from three contigs to two references.
Expand Down

0 comments on commit ecf3bd0

Please sign in to comment.