From 1987f45b8ef5538df9ff17cfaf355b20fe169108 Mon Sep 17 00:00:00 2001 From: donkirkby Date: Wed, 17 Oct 2018 10:30:28 -0700 Subject: [PATCH] Convert some aln2counts tests to pytest as part of #442. --- micall/tests/test_aln2counts.py | 344 ------------------ micall/tests/test_aln2counts_seed_amino.py | 154 ++++++++ .../tests/test_aln2counts_seed_nucleotide.py | 239 ++++++++++++ 3 files changed, 393 insertions(+), 344 deletions(-) create mode 100644 micall/tests/test_aln2counts_seed_amino.py create mode 100644 micall/tests/test_aln2counts_seed_nucleotide.py diff --git a/micall/tests/test_aln2counts.py b/micall/tests/test_aln2counts.py index 2257d8c19..277612056 100644 --- a/micall/tests/test_aln2counts.py +++ b/micall/tests/test_aln2counts.py @@ -2376,347 +2376,3 @@ def testUnsortedInserts(self): self.writer.write(inserts=(9, 6), region='R1') self.assertMultiLineEqual(expected_text, self.insert_file.getvalue()) - - -class SeedAminoTest(unittest.TestCase): - def setUp(self): - self.amino = SeedAmino(None) - - def testSingleRead(self): - """ Read a single codon, and report on counts. - Columns are: A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,* - """ - nuc_seq = 'AAA' # -> K - expected_counts = '0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0' - - self.amino.count_aminos(nuc_seq, 8) - counts = self.amino.get_report() - - self.assertSequenceEqual(expected_counts, counts) - - def testDifferentCodon(self): - """ Read two different codons, and report on counts. - Columns are: A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,* - """ - nuc_seq1 = 'AAA' # -> K - nuc_seq2 = 'GGG' # -> G - expected_counts = '0,0,0,0,0,5,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0' - - self.amino.count_aminos(nuc_seq1, 8) - self.amino.count_aminos(nuc_seq2, 5) - counts = self.amino.get_report() - - self.assertSequenceEqual(expected_counts, counts) - - def testSameAminoAcid(self): - """ Read same codon twice, and report on counts. - Columns are: A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,* - """ - nuc_seq1 = 'AAA' # -> K - nuc_seq2 = 'AAG' # -> K - expected_counts = '0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0' - - self.amino.count_aminos(nuc_seq1, 4) - self.amino.count_aminos(nuc_seq2, 5) - counts = self.amino.get_report() - - self.assertSequenceEqual(expected_counts, counts) - - def testNucleotides(self): - nuc_seq1 = 'AAA' # -> K - nuc_seq2 = 'AAG' # -> K - expected_nuc_counts = '4,0,5,0' - - self.amino.count_aminos(nuc_seq1, 4) - self.amino.count_aminos(nuc_seq2, 5) - counts = self.amino.nucleotides[2].get_report() - - self.assertSequenceEqual(expected_nuc_counts, counts) - - def testConsensus(self): - nuc_seq1 = 'AAA' # -> K - nuc_seq2 = 'GGG' # -> G - expected_consensus = 'G' - - self.amino.count_aminos(nuc_seq1, 4) - self.amino.count_aminos(nuc_seq2, 5) - consensus = self.amino.get_consensus() - - self.assertSequenceEqual(expected_consensus, consensus) - - def testConsensusMixture(self): - nuc_seq1 = 'AAA' # -> K - nuc_seq2 = 'GGG' # -> G - nuc_seq3 = 'TTT' # -> F - allowed_consensus_values = ('G', 'K') - - self.amino.count_aminos(nuc_seq1, 4) - self.amino.count_aminos(nuc_seq2, 4) - self.amino.count_aminos(nuc_seq3, 3) - consensus = self.amino.get_consensus() - - self.assertIn(consensus, allowed_consensus_values) - - def testConsensusWithNoReads(self): - consensus = self.amino.get_consensus() - - self.assertEqual(consensus, '-') - - def testMissingData(self): - """ Lower-case n represents a gap between the forward and reverse reads. """ - - nuc_seq = 'CTn' - expected_consensus = '-' - - self.amino.count_aminos(nuc_seq, 1) - consensus = self.amino.get_consensus() - - self.assertEqual(expected_consensus, consensus) - - def testAmbiguousData(self): - """If a read is ambiguous, don't count it toward consensus.""" - - nuc_seq1 = 'Cnn' # -> ? - nuc_seq2 = 'AAA' # -> K - expected_consensus = 'K' - - self.amino.count_aminos(nuc_seq1, 9) - self.amino.count_aminos(nuc_seq2, 1) - consensus = self.amino.get_consensus() - - self.assertEqual(expected_consensus, consensus) - - def testOverlap(self): - self.amino.count_aminos('GGG', 4) - other = SeedAmino(consensus_nuc_index=7) - other.count_aminos('TAG', 5) - expected_counts = {'G': 4} - expected_v3_overlap = 5 - - self.amino.count_overlap(other) - - self.assertEqual(expected_counts, self.amino.counts) - self.assertEqual(expected_v3_overlap, self.amino.v3_overlap) - self.assertEqual(expected_v3_overlap, - self.amino.nucleotides[0].v3_overlap) - - def testOverlapPartialCodon(self): - self.amino.count_aminos('GGG', 4) - other = SeedAmino(consensus_nuc_index=7) - other.count_aminos('TA', 5) - expected_counts = {'G': 4} - expected_v3_overlap = 5 - - self.amino.count_overlap(other) - - self.assertEqual(expected_counts, self.amino.counts) - self.assertEqual(expected_v3_overlap, self.amino.v3_overlap) - self.assertEqual(expected_v3_overlap, - self.amino.nucleotides[0].v3_overlap) - - -class SeedNucleotideTest(unittest.TestCase): - def setUp(self): - self.nuc = SeedNucleotide() - - def testSingleRead(self): - """ Read a single nucleotide, and report on counts. - Columns are: A,C,G,T - """ - nuc_seq = 'C' - expected_counts = '0,8,0,0' - - self.nuc.count_nucleotides(nuc_seq, 8) - counts = self.nuc.get_report() - - self.assertSequenceEqual(expected_counts, counts) - - def testConsensusNoMixes(self): - self.nuc.count_nucleotides('C', 1) - consensus_max = self.nuc.get_consensus(MAX_CUTOFF) - consensus_mix = self.nuc.get_consensus(0.1) - - expected_consensus = 'C' - self.assertEqual(expected_consensus, consensus_max) - self.assertEqual(expected_consensus, consensus_mix) - - def testConsensusMixed(self): - self.nuc.count_nucleotides('C', 2) - self.nuc.count_nucleotides('T', 1) - consensus_max = self.nuc.get_consensus(MAX_CUTOFF) - consensus_mix = self.nuc.get_consensus(0.1) - - expected_consensus_max = 'C' - expected_consensus_mix = 'Y' - self.assertEqual(expected_consensus_max, consensus_max) - self.assertEqual(expected_consensus_mix, consensus_mix) - - def testConsensusMixedThree(self): - self.nuc.count_nucleotides('C', 2) - self.nuc.count_nucleotides('T', 1) - self.nuc.count_nucleotides('G', 1) - consensus_max = self.nuc.get_consensus(MAX_CUTOFF) - consensus_mix = self.nuc.get_consensus(0.1) - - expected_consensus_max = 'C' - expected_consensus_mix = 'B' # B is a mix of T, G, and C - self.assertEqual(expected_consensus_max, consensus_max) - self.assertEqual(expected_consensus_mix, consensus_mix) - - def testConsensusMixedAll(self): - self.nuc.count_nucleotides('C', 2) - self.nuc.count_nucleotides('T', 1) - self.nuc.count_nucleotides('G', 1) - self.nuc.count_nucleotides('A', 1) - consensus_max = self.nuc.get_consensus(MAX_CUTOFF) - consensus_mix = self.nuc.get_consensus(0.1) - - expected_consensus_max = 'C' - expected_consensus_mix = 'N' # All four are reported as N - self.assertEqual(expected_consensus_max, consensus_max) - self.assertEqual(expected_consensus_mix, consensus_mix) - - def testConsensusMixedMax(self): - self.nuc.count_nucleotides('C', 2) - self.nuc.count_nucleotides('T', 2) - self.nuc.count_nucleotides('G', 1) - consensus_max = self.nuc.get_consensus(MAX_CUTOFF) - consensus_mix = self.nuc.get_consensus(0.1) - - expected_consensus_max = 'Y' # C and T tie for max, mix is Y - expected_consensus_mix = 'B' # C, T, and G mix is B - self.assertEqual(expected_consensus_max, consensus_max) - self.assertEqual(expected_consensus_mix, consensus_mix) - - def testConsensusCutoff(self): - self.nuc.count_nucleotides('C', 2) - self.nuc.count_nucleotides('T', 1) - consensus_mix = self.nuc.get_consensus(0.5) - - expected_consensus = 'C' # T was below the cutoff - self.assertEqual(expected_consensus, consensus_mix) - - def testConsensusCutoffAtBoundary(self): - self.nuc.count_nucleotides('C', 9000) - self.nuc.count_nucleotides('T', 1000) - consensus_mix = self.nuc.get_consensus(0.1) - - expected_consensus = 'Y' # T was at the cutoff - self.assertEqual(expected_consensus, consensus_mix) - - def testConsensusCutoffBelowBoundary(self): - self.nuc.count_nucleotides('C', 9001) - self.nuc.count_nucleotides('T', 999) - consensus_mix = self.nuc.get_consensus(0.1) - - expected_consensus = 'C' # T was below the cutoff - self.assertEqual(expected_consensus, consensus_mix) - - def testConsensusMixedWithPoorQuality(self): - self.nuc.count_nucleotides('N', 99) - self.nuc.count_nucleotides('T', 1) - consensus_max = self.nuc.get_consensus(MAX_CUTOFF) - consensus_mix_one_pct = self.nuc.get_consensus(0.01) - consensus_mix_ten_pct = self.nuc.get_consensus(0.10) - - expected_consensus_max = 'T' # N always overruled - expected_consensus_mix_one_pct = 'T' - expected_consensus_mix_ten_pct = 'T' - self.assertEqual(expected_consensus_max, consensus_max) - self.assertEqual(expected_consensus_mix_one_pct, consensus_mix_one_pct) - self.assertEqual(expected_consensus_mix_ten_pct, consensus_mix_ten_pct) - - def testConsensusMixedWithGap(self): - self.nuc.count_nucleotides('-', 99) - self.nuc.count_nucleotides('T', 1) - consensus_max = self.nuc.get_consensus(MAX_CUTOFF) - consensus_mix_one_pct = self.nuc.get_consensus(0.01) - consensus_mix_ten_pct = self.nuc.get_consensus(0.10) - - expected_consensus_max = '-' # most common - expected_consensus_mix_one_pct = 't' # mix of both - expected_consensus_mix_ten_pct = '-' # only deletions - self.assertEqual(expected_consensus_max, consensus_max) - self.assertEqual(expected_consensus_mix_one_pct, consensus_mix_one_pct) - self.assertEqual(expected_consensus_mix_ten_pct, consensus_mix_ten_pct) - - def testConsensusMixedWithGapAndPoorQuality(self): - self.nuc.count_nucleotides('N', 3) - self.nuc.count_nucleotides('-', 2) - self.nuc.count_nucleotides('T', 1) - consensus_max = self.nuc.get_consensus(MAX_CUTOFF) - consensus_mix = self.nuc.get_consensus(0.1) - - expected_consensus_max = '-' - expected_consensus_mix = 't' - self.assertEqual(expected_consensus_max, consensus_max) - self.assertEqual(expected_consensus_mix, consensus_mix) - - def testConsensusPoorQualityOnly(self): - self.nuc.count_nucleotides('N', 1) - consensus_max = self.nuc.get_consensus(MAX_CUTOFF) - consensus_mix = self.nuc.get_consensus(0.1) - - expected_consensus_max = 'N' - expected_consensus_mix = 'N' - self.assertEqual(expected_consensus_max, consensus_max) - self.assertEqual(expected_consensus_mix, consensus_mix) - - def testConsensusMixedGapAndPoorQualityOnly(self): - self.nuc.count_nucleotides('N', 3) - self.nuc.count_nucleotides('-', 2) - consensus_max = self.nuc.get_consensus(MAX_CUTOFF) - consensus_mix = self.nuc.get_consensus(0.1) - - expected_consensus_max = '-' - expected_consensus_mix = '-' - self.assertEqual(expected_consensus_max, consensus_max) - self.assertEqual(expected_consensus_mix, consensus_mix) - - def testConsensusAllBelowCutoff(self): - self.nuc.count_nucleotides('C', 101) - self.nuc.count_nucleotides('T', 100) - self.nuc.count_nucleotides('G', 99) - consensus_max = self.nuc.get_consensus(MAX_CUTOFF) - consensus_mix = self.nuc.get_consensus(0.5) - - expected_consensus_max = 'C' - expected_consensus_mix = 'N' - self.assertEqual(expected_consensus_max, consensus_max) - self.assertEqual(expected_consensus_mix, consensus_mix) - - def testConsensusBetweenReads(self): - """Lower-case n represents the gap between forward and reverse reads. - - Should not be counted in consensus totals""" - self.nuc.count_nucleotides('C', 9) - self.nuc.count_nucleotides('T', 1) - self.nuc.count_nucleotides('n', 2) - consensus_mix = self.nuc.get_consensus(0.1) - - expected_consensus = 'Y' - self.assertEqual(expected_consensus, consensus_mix) - - def testConsensusMissingPositions(self): - """ Positions that are never read are ignored in the consensus. """ - - # No counts added - - consensus_max = self.nuc.get_consensus(MAX_CUTOFF) - consensus_mix = self.nuc.get_consensus(0.1) - - expected_consensus = '' - self.assertEqual(expected_consensus, consensus_max) - self.assertEqual(expected_consensus, consensus_mix) - - def testOverlap(self): - self.nuc.count_nucleotides('T', 4) - other = SeedNucleotide() - other.count_nucleotides('C', 5) - expected_counts = {'T': 4} - expected_v3_overlap = 5 - - self.nuc.count_overlap(other) - - self.assertEqual(expected_counts, self.nuc.counts) - self.assertEqual(expected_v3_overlap, self.nuc.v3_overlap) diff --git a/micall/tests/test_aln2counts_seed_amino.py b/micall/tests/test_aln2counts_seed_amino.py new file mode 100644 index 000000000..48746387d --- /dev/null +++ b/micall/tests/test_aln2counts_seed_amino.py @@ -0,0 +1,154 @@ +from micall.core.aln2counts import SeedAmino + + +def test_single_read(): + """ Read a single codon, and report on counts. + Columns are: A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,* + """ + nuc_seq = 'AAA' # -> K + expected_counts = '0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0' + amino = SeedAmino(None) + + amino.count_aminos(nuc_seq, 8) + counts = amino.get_report() + + assert expected_counts == counts + + +def test_different_codon(): + """ Read two different codons, and report on counts. + Columns are: A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,* + """ + nuc_seq1 = 'AAA' # -> K + nuc_seq2 = 'GGG' # -> G + expected_counts = '0,0,0,0,0,5,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0' + amino = SeedAmino(None) + + amino.count_aminos(nuc_seq1, 8) + amino.count_aminos(nuc_seq2, 5) + counts = amino.get_report() + + assert expected_counts == counts + + +def test_same_amino_acid(): + """ Read same codon twice, and report on counts. + Columns are: A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,* + """ + nuc_seq1 = 'AAA' # -> K + nuc_seq2 = 'AAG' # -> K + expected_counts = '0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0' + amino = SeedAmino(None) + + amino.count_aminos(nuc_seq1, 4) + amino.count_aminos(nuc_seq2, 5) + counts = amino.get_report() + + assert expected_counts == counts + + +def test_nucleotides(): + nuc_seq1 = 'AAA' # -> K + nuc_seq2 = 'AAG' # -> K + expected_nuc_counts = '4,0,5,0' + amino = SeedAmino(None) + + amino.count_aminos(nuc_seq1, 4) + amino.count_aminos(nuc_seq2, 5) + counts = amino.nucleotides[2].get_report() + + assert expected_nuc_counts == counts + + +def test_consensus(): + nuc_seq1 = 'AAA' # -> K + nuc_seq2 = 'GGG' # -> G + expected_consensus = 'G' + amino = SeedAmino(None) + + amino.count_aminos(nuc_seq1, 4) + amino.count_aminos(nuc_seq2, 5) + consensus = amino.get_consensus() + + assert expected_consensus == consensus + + +def test_consensus_mixture(): + nuc_seq1 = 'AAA' # -> K + nuc_seq2 = 'GGG' # -> G + nuc_seq3 = 'TTT' # -> F + allowed_consensus_values = ('G', 'K') + amino = SeedAmino(None) + + amino.count_aminos(nuc_seq1, 4) + amino.count_aminos(nuc_seq2, 4) + amino.count_aminos(nuc_seq3, 3) + consensus = amino.get_consensus() + + assert consensus in allowed_consensus_values + + +def test_consensus_with_no_reads(): + amino = SeedAmino(None) + + consensus = amino.get_consensus() + + assert consensus == '-' + + +def test_missing_data(): + """ Lower-case n represents a gap between the forward and reverse reads. """ + + nuc_seq = 'CTn' + expected_consensus = '-' + amino = SeedAmino(None) + + amino.count_aminos(nuc_seq, 1) + consensus = amino.get_consensus() + + assert expected_consensus == consensus + + +def test_ambiguous_data(): + """If a read is ambiguous, don't count it toward consensus.""" + + nuc_seq1 = 'Cnn' # -> ? + nuc_seq2 = 'AAA' # -> K + expected_consensus = 'K' + amino = SeedAmino(None) + + amino.count_aminos(nuc_seq1, 9) + amino.count_aminos(nuc_seq2, 1) + consensus = amino.get_consensus() + + assert expected_consensus == consensus + + +def test_overlap(): + amino = SeedAmino(None) + amino.count_aminos('GGG', 4) + other = SeedAmino(consensus_nuc_index=7) + other.count_aminos('TAG', 5) + expected_counts = {'G': 4} + expected_v3_overlap = 5 + + amino.count_overlap(other) + + assert expected_counts == amino.counts + assert expected_v3_overlap == amino.v3_overlap + assert expected_v3_overlap == amino.nucleotides[0].v3_overlap + + +def test_overlap_partial_codon(): + amino = SeedAmino(None) + amino.count_aminos('GGG', 4) + other = SeedAmino(consensus_nuc_index=7) + other.count_aminos('TA', 5) + expected_counts = {'G': 4} + expected_v3_overlap = 5 + + amino.count_overlap(other) + + assert expected_counts == amino.counts + assert expected_v3_overlap == amino.v3_overlap + assert expected_v3_overlap == amino.nucleotides[0].v3_overlap diff --git a/micall/tests/test_aln2counts_seed_nucleotide.py b/micall/tests/test_aln2counts_seed_nucleotide.py new file mode 100644 index 000000000..69bba269b --- /dev/null +++ b/micall/tests/test_aln2counts_seed_nucleotide.py @@ -0,0 +1,239 @@ +from micall.core.aln2counts import SeedNucleotide, MAX_CUTOFF + + +def test_single_read(): + """ Read a single nucleotide, and report on counts. + Columns are: A,C,G,T + """ + nuc_seq = 'C' + expected_counts = '0,8,0,0' + nuc = SeedNucleotide() + + nuc.count_nucleotides(nuc_seq, 8) + counts = nuc.get_report() + + assert expected_counts == counts + + +def test_consensus_no_mixes(): + nuc = SeedNucleotide() + nuc.count_nucleotides('C', 1) + consensus_max = nuc.get_consensus(MAX_CUTOFF) + consensus_mix = nuc.get_consensus(0.1) + + expected_consensus = 'C' + assert expected_consensus == consensus_max + assert expected_consensus == consensus_mix + + +def test_consensus_mixed(): + nuc = SeedNucleotide() + nuc.count_nucleotides('C', 2) + nuc.count_nucleotides('T', 1) + consensus_max = nuc.get_consensus(MAX_CUTOFF) + consensus_mix = nuc.get_consensus(0.1) + + expected_consensus_max = 'C' + expected_consensus_mix = 'Y' + assert expected_consensus_max == consensus_max + assert expected_consensus_mix == consensus_mix + + +def test_consensus_mixed_three(): + nuc = SeedNucleotide() + nuc.count_nucleotides('C', 2) + nuc.count_nucleotides('T', 1) + nuc.count_nucleotides('G', 1) + consensus_max = nuc.get_consensus(MAX_CUTOFF) + consensus_mix = nuc.get_consensus(0.1) + + expected_consensus_max = 'C' + expected_consensus_mix = 'B' # B is a mix of T, G, and C + assert expected_consensus_max == consensus_max + assert expected_consensus_mix == consensus_mix + + +def test_consensus_mixed_all(): + nuc = SeedNucleotide() + nuc.count_nucleotides('C', 2) + nuc.count_nucleotides('T', 1) + nuc.count_nucleotides('G', 1) + nuc.count_nucleotides('A', 1) + consensus_max = nuc.get_consensus(MAX_CUTOFF) + consensus_mix = nuc.get_consensus(0.1) + + expected_consensus_max = 'C' + expected_consensus_mix = 'N' # All four are reported as N + assert expected_consensus_max == consensus_max + assert expected_consensus_mix == consensus_mix + + +def test_consensus_mixed_max(): + nuc = SeedNucleotide() + nuc.count_nucleotides('C', 2) + nuc.count_nucleotides('T', 2) + nuc.count_nucleotides('G', 1) + consensus_max = nuc.get_consensus(MAX_CUTOFF) + consensus_mix = nuc.get_consensus(0.1) + + expected_consensus_max = 'Y' # C and T tie for max, mix is Y + expected_consensus_mix = 'B' # C, T, and G mix is B + assert expected_consensus_max == consensus_max + assert expected_consensus_mix == consensus_mix + + +def test_consensus_cutoff(): + nuc = SeedNucleotide() + nuc.count_nucleotides('C', 2) + nuc.count_nucleotides('T', 1) + consensus_mix = nuc.get_consensus(0.5) + + expected_consensus = 'C' # T was below the cutoff + assert expected_consensus == consensus_mix + + +def test_consensus_cutoff_at_boundary(): + nuc = SeedNucleotide() + nuc.count_nucleotides('C', 9000) + nuc.count_nucleotides('T', 1000) + consensus_mix = nuc.get_consensus(0.1) + + expected_consensus = 'Y' # T was at the cutoff + assert expected_consensus == consensus_mix + + +def test_consensus_cutoff_below_boundary(): + nuc = SeedNucleotide() + nuc.count_nucleotides('C', 9001) + nuc.count_nucleotides('T', 999) + consensus_mix = nuc.get_consensus(0.1) + + expected_consensus = 'C' # T was below the cutoff + assert expected_consensus == consensus_mix + + +def test_consensus_mixed_with_poor_quality(): + nuc = SeedNucleotide() + nuc.count_nucleotides('N', 99) + nuc.count_nucleotides('T', 1) + consensus_max = nuc.get_consensus(MAX_CUTOFF) + consensus_mix_one_pct = nuc.get_consensus(0.01) + consensus_mix_ten_pct = nuc.get_consensus(0.10) + + expected_consensus_max = 'T' # N always overruled + expected_consensus_mix_one_pct = 'T' + expected_consensus_mix_ten_pct = 'T' + assert expected_consensus_max == consensus_max + assert expected_consensus_mix_one_pct == consensus_mix_one_pct + assert expected_consensus_mix_ten_pct == consensus_mix_ten_pct + + +def test_consensus_mixed_with_gap(): + nuc = SeedNucleotide() + nuc.count_nucleotides('-', 99) + nuc.count_nucleotides('T', 1) + consensus_max = nuc.get_consensus(MAX_CUTOFF) + consensus_mix_one_pct = nuc.get_consensus(0.01) + consensus_mix_ten_pct = nuc.get_consensus(0.10) + + expected_consensus_max = '-' # most common + expected_consensus_mix_one_pct = 't' # mix of both + expected_consensus_mix_ten_pct = '-' # only deletions + assert expected_consensus_max == consensus_max + assert expected_consensus_mix_one_pct == consensus_mix_one_pct + assert expected_consensus_mix_ten_pct == consensus_mix_ten_pct + + +def test_consensus_mixed_with_gap_and_poor_quality(): + nuc = SeedNucleotide() + nuc.count_nucleotides('N', 3) + nuc.count_nucleotides('-', 2) + nuc.count_nucleotides('T', 1) + consensus_max = nuc.get_consensus(MAX_CUTOFF) + consensus_mix = nuc.get_consensus(0.1) + + expected_consensus_max = '-' + expected_consensus_mix = 't' + assert expected_consensus_max == consensus_max + assert expected_consensus_mix == consensus_mix + + +def test_consensus_poor_quality_only(): + nuc = SeedNucleotide() + nuc.count_nucleotides('N', 1) + consensus_max = nuc.get_consensus(MAX_CUTOFF) + consensus_mix = nuc.get_consensus(0.1) + + expected_consensus_max = 'N' + expected_consensus_mix = 'N' + assert expected_consensus_max == consensus_max + assert expected_consensus_mix == consensus_mix + + +def test_consensus_mixed_gap_and_poor_quality_only(): + nuc = SeedNucleotide() + nuc.count_nucleotides('N', 3) + nuc.count_nucleotides('-', 2) + consensus_max = nuc.get_consensus(MAX_CUTOFF) + consensus_mix = nuc.get_consensus(0.1) + + expected_consensus_max = '-' + expected_consensus_mix = '-' + assert expected_consensus_max == consensus_max + assert expected_consensus_mix == consensus_mix + + +def test_consensus_all_below_cutoff(): + nuc = SeedNucleotide() + nuc.count_nucleotides('C', 101) + nuc.count_nucleotides('T', 100) + nuc.count_nucleotides('G', 99) + consensus_max = nuc.get_consensus(MAX_CUTOFF) + consensus_mix = nuc.get_consensus(0.5) + + expected_consensus_max = 'C' + expected_consensus_mix = 'N' + assert expected_consensus_max == consensus_max + assert expected_consensus_mix == consensus_mix + + +def test_consensus_between_reads(): + """Lower-case n represents the gap between forward and reverse reads. + + Should not be counted in consensus totals""" + nuc = SeedNucleotide() + nuc.count_nucleotides('C', 9) + nuc.count_nucleotides('T', 1) + nuc.count_nucleotides('n', 2) + consensus_mix = nuc.get_consensus(0.1) + + expected_consensus = 'Y' + assert expected_consensus == consensus_mix + + +def test_consensus_missing_positions(): + """ Positions that are never read are ignored in the consensus. """ + + # No counts added + nuc = SeedNucleotide() + + consensus_max = nuc.get_consensus(MAX_CUTOFF) + consensus_mix = nuc.get_consensus(0.1) + + expected_consensus = '' + assert expected_consensus == consensus_max + assert expected_consensus == consensus_mix + + +def test_overlap(): + nuc = SeedNucleotide() + nuc.count_nucleotides('T', 4) + other = SeedNucleotide() + other.count_nucleotides('C', 5) + expected_counts = {'T': 4} + expected_v3_overlap = 5 + + nuc.count_overlap(other) + + assert expected_counts == nuc.counts + assert expected_v3_overlap == nuc.v3_overlap