Skip to content

Commit

Permalink
Show known resistance when missing MIDI for #412.
Browse files Browse the repository at this point in the history
  • Loading branch information
donkirkby committed Jan 27, 2018
1 parent 4c04b73 commit 4b68999
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 2 deletions.
2 changes: 1 addition & 1 deletion micall/hivdb/asi_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def interpret(self, aaseq, region):
default_level = ResistanceLevels.FAIL.level
default_level_name = self.level_def[str(default_level)]

mutations = VariantCalls(reference=(self.stds[region]), sample=aaseq)
mutations = VariantCalls(reference=self.stds[region], sample=aaseq)

for drug_class in drug_classes:
for drug_code in self.drug_class[drug_class]:
Expand Down
46 changes: 45 additions & 1 deletion micall/hivdb/hivdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def read_aminos(amino_csv, min_fraction, reported_regions=None, min_coverage=0):
max_pos = None
# TODO: Remove last_coverage check after validating against Ruby version.
last_coverage = 0
last_covered_pos = None
for row in rows:
counts = list(map(int, (row[f] for f in coverage_columns)))
coverage = int(row['coverage'])
Expand All @@ -102,6 +103,7 @@ def read_aminos(amino_csv, min_fraction, reported_regions=None, min_coverage=0):
if coverage == 0 or coverage < min_coverage:
pos_aminos = {}
else:
last_covered_pos = pos
min_count = max(1, coverage * min_fraction) # needs at least 1
pos_aminos = {report_names[i]: count/coverage
for i, count in enumerate(counts)
Expand All @@ -110,6 +112,11 @@ def read_aminos(amino_csv, min_fraction, reported_regions=None, min_coverage=0):
if ins_count >= min_count:
pos_aminos['i'] = ins_count / coverage
aminos.append(pos_aminos)
if (region.endswith('-NS5b') and
last_covered_pos is not None and
last_covered_pos < 400):
# Override last_coverage check when MIDI is missing.
last_coverage = min_coverage
if max_pos is None or min(last_coverage,
total_coverage // max_pos) >= min_coverage:
# Need original region to look up wild type.
Expand Down Expand Up @@ -186,7 +193,7 @@ def write_resistance(aminos, resistance_csv, mutations_csv, algorithms=None):
asi = algorithms.get(genotype)
if asi is None:
continue
result = asi.interpret(amino_seq, region)
result = interpret(asi, amino_seq, region)
region_results.append((region, amino_seq, result))
if all(drug_result.level == ResistanceLevels.FAIL.level
for region, amino_seq, result in region_results
Expand Down Expand Up @@ -217,6 +224,43 @@ def write_resistance(aminos, resistance_csv, mutations_csv, algorithms=None):
genotype=genotype))


def interpret(asi, amino_seq, region):
# TODO: Make this more general instead of only applying to missing MIDI.
result = asi.interpret(amino_seq, region)
if not region.endswith('-NS5b'):
return result
for drug_result in result.drugs:
if drug_result.level == ResistanceLevels.FAIL.level:
break
else:
return result
i = 0
for i, aminos in reversed(list(enumerate(amino_seq))):
if aminos:
break
if i <= 400:
# Missing the MIDI coverage, see if we can report resistance.
ref_seq = asi.stds[region]
new_amino_seq = []
for i, old_aminos in enumerate(amino_seq):
if old_aminos:
new_amino_seq.append(old_aminos)
else:
# Assume wild type.
new_amino_seq.append({ref_seq[i]: 1.0})
new_result = asi.interpret(new_amino_seq, region)
result.mutations = new_result.mutations
for old_drug_result, new_drug_result in zip(result.drugs, new_result.drugs):
if new_drug_result.level == ResistanceLevels.RESISTANCE_LIKELY.level:
old_drug_result.level = new_drug_result.level
old_drug_result.level_name = new_drug_result.level_name
old_drug_result.score = new_drug_result.score



return result


def load_asi():
asi = AsiAlgorithm(HIV_RULES_PATH)
algorithms = {None: asi}
Expand Down
41 changes: 41 additions & 0 deletions micall/tests/test_hivdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,25 @@ def test_coverage_missing_at_end(self):

self.assertEqual(expected_aminos, aminos)

def test_resistant_even_with_missing_midi(self):
amino_csv = StringIO("""\
seed,region,q-cutoff,query.nuc.pos,refseq.aa.pos,\
A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,*,X,partial,del,ins,clip,g2p_overlap,coverage
HCV-1a,HCV1A-H77-NS5b,15,1,395,0,0,0,0,0,5000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5000
HCV-1a,HCV1A-H77-NS5b,15,4,396,0,0,0,0,5000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5000
""")
min_fraction = 0.2
min_coverage = 9
expected_aminos = [AminoList('HCV1A-H77-NS5b',
[{'G': 1.0}, {'F': 1.0}],
'1A')]

aminos = list(read_aminos(amino_csv,
min_fraction,
min_coverage=min_coverage))

self.assertEqual(expected_aminos, aminos)

def test_missing_region(self):
amino_csv = StringIO("""\
seed,region,q-cutoff,query.nuc.pos,refseq.aa.pos,\
Expand Down Expand Up @@ -564,6 +583,28 @@ def test_hcv(self):
self.assertEqual(expected_resistance, resistance_csv.getvalue())
self.assertEqual(expected_mutations, mutations_csv.getvalue())

def test_hcv_missing_midi(self):
aminos = [AminoList('HCV1A-H77-NS5b',
[{}]*394 + [{'G': 1.0}] + [{}]*196,
'1A')]
resistance_csv = StringIO()
mutations_csv = StringIO()
expected_resistance = """\
region,drug_class,drug,drug_name,level,level_name,score,genotype
NS5b,NS5b,DSV,DSV,5,Resistance Likely,8.0,1A
NS5b,NS5b,SOF-EPC,SOF-EPC,0,Sequence does not meet quality-control standards,0.0,1A
NS5b,NS5b,SOF-HAR,SOF-HAR,0,Sequence does not meet quality-control standards,0.0,1A
"""
expected_mutations = """\
drug_class,mutation,prevalence,genotype
NS5b,A395G,1.0,1A
"""

write_resistance(aminos, resistance_csv, mutations_csv, self.algorithms)

self.assertEqual(expected_resistance, resistance_csv.getvalue())
self.assertEqual(expected_mutations, mutations_csv.getvalue())

def test_hcv_low_coverage(self):
aminos = []
resistance_csv = StringIO()
Expand Down

0 comments on commit 4b68999

Please sign in to comment.