diff --git a/micall/hivdb/asi_algorithm.py b/micall/hivdb/asi_algorithm.py index 93b3b97e7..141093656 100644 --- a/micall/hivdb/asi_algorithm.py +++ b/micall/hivdb/asi_algorithm.py @@ -305,7 +305,7 @@ def interpret(self, aaseq, region): default_level = ResistanceLevels.FAIL.level default_level_name = self.level_def[str(default_level)] - mutations = VariantCalls(reference=(self.stds[region]), sample=aaseq) + mutations = VariantCalls(reference=self.stds[region], sample=aaseq) for drug_class in drug_classes: for drug_code in self.drug_class[drug_class]: diff --git a/micall/hivdb/hivdb.py b/micall/hivdb/hivdb.py index eb42ee281..98eea2b25 100644 --- a/micall/hivdb/hivdb.py +++ b/micall/hivdb/hivdb.py @@ -91,6 +91,7 @@ def read_aminos(amino_csv, min_fraction, reported_regions=None, min_coverage=0): max_pos = None # TODO: Remove last_coverage check after validating against Ruby version. last_coverage = 0 + last_covered_pos = None for row in rows: counts = list(map(int, (row[f] for f in coverage_columns))) coverage = int(row['coverage']) @@ -102,6 +103,7 @@ def read_aminos(amino_csv, min_fraction, reported_regions=None, min_coverage=0): if coverage == 0 or coverage < min_coverage: pos_aminos = {} else: + last_covered_pos = pos min_count = max(1, coverage * min_fraction) # needs at least 1 pos_aminos = {report_names[i]: count/coverage for i, count in enumerate(counts) @@ -110,6 +112,11 @@ def read_aminos(amino_csv, min_fraction, reported_regions=None, min_coverage=0): if ins_count >= min_count: pos_aminos['i'] = ins_count / coverage aminos.append(pos_aminos) + if (region.endswith('-NS5b') and + last_covered_pos is not None and + last_covered_pos < 400): + # Override last_coverage check when MIDI is missing. + last_coverage = min_coverage if max_pos is None or min(last_coverage, total_coverage // max_pos) >= min_coverage: # Need original region to look up wild type. @@ -186,7 +193,7 @@ def write_resistance(aminos, resistance_csv, mutations_csv, algorithms=None): asi = algorithms.get(genotype) if asi is None: continue - result = asi.interpret(amino_seq, region) + result = interpret(asi, amino_seq, region) region_results.append((region, amino_seq, result)) if all(drug_result.level == ResistanceLevels.FAIL.level for region, amino_seq, result in region_results @@ -217,6 +224,43 @@ def write_resistance(aminos, resistance_csv, mutations_csv, algorithms=None): genotype=genotype)) +def interpret(asi, amino_seq, region): + # TODO: Make this more general instead of only applying to missing MIDI. + result = asi.interpret(amino_seq, region) + if not region.endswith('-NS5b'): + return result + for drug_result in result.drugs: + if drug_result.level == ResistanceLevels.FAIL.level: + break + else: + return result + i = 0 + for i, aminos in reversed(list(enumerate(amino_seq))): + if aminos: + break + if i <= 400: + # Missing the MIDI coverage, see if we can report resistance. + ref_seq = asi.stds[region] + new_amino_seq = [] + for i, old_aminos in enumerate(amino_seq): + if old_aminos: + new_amino_seq.append(old_aminos) + else: + # Assume wild type. + new_amino_seq.append({ref_seq[i]: 1.0}) + new_result = asi.interpret(new_amino_seq, region) + result.mutations = new_result.mutations + for old_drug_result, new_drug_result in zip(result.drugs, new_result.drugs): + if new_drug_result.level == ResistanceLevels.RESISTANCE_LIKELY.level: + old_drug_result.level = new_drug_result.level + old_drug_result.level_name = new_drug_result.level_name + old_drug_result.score = new_drug_result.score + + + + return result + + def load_asi(): asi = AsiAlgorithm(HIV_RULES_PATH) algorithms = {None: asi} diff --git a/micall/tests/test_hivdb.py b/micall/tests/test_hivdb.py index 92b0e0eb1..ba13cadfb 100644 --- a/micall/tests/test_hivdb.py +++ b/micall/tests/test_hivdb.py @@ -273,6 +273,25 @@ def test_coverage_missing_at_end(self): self.assertEqual(expected_aminos, aminos) + def test_resistant_even_with_missing_midi(self): + amino_csv = StringIO("""\ +seed,region,q-cutoff,query.nuc.pos,refseq.aa.pos,\ +A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,*,X,partial,del,ins,clip,g2p_overlap,coverage +HCV-1a,HCV1A-H77-NS5b,15,1,395,0,0,0,0,0,5000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5000 +HCV-1a,HCV1A-H77-NS5b,15,4,396,0,0,0,0,5000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5000 +""") + min_fraction = 0.2 + min_coverage = 9 + expected_aminos = [AminoList('HCV1A-H77-NS5b', + [{'G': 1.0}, {'F': 1.0}], + '1A')] + + aminos = list(read_aminos(amino_csv, + min_fraction, + min_coverage=min_coverage)) + + self.assertEqual(expected_aminos, aminos) + def test_missing_region(self): amino_csv = StringIO("""\ seed,region,q-cutoff,query.nuc.pos,refseq.aa.pos,\ @@ -564,6 +583,28 @@ def test_hcv(self): self.assertEqual(expected_resistance, resistance_csv.getvalue()) self.assertEqual(expected_mutations, mutations_csv.getvalue()) + def test_hcv_missing_midi(self): + aminos = [AminoList('HCV1A-H77-NS5b', + [{}]*394 + [{'G': 1.0}] + [{}]*196, + '1A')] + resistance_csv = StringIO() + mutations_csv = StringIO() + expected_resistance = """\ +region,drug_class,drug,drug_name,level,level_name,score,genotype +NS5b,NS5b,DSV,DSV,5,Resistance Likely,8.0,1A +NS5b,NS5b,SOF-EPC,SOF-EPC,0,Sequence does not meet quality-control standards,0.0,1A +NS5b,NS5b,SOF-HAR,SOF-HAR,0,Sequence does not meet quality-control standards,0.0,1A +""" + expected_mutations = """\ +drug_class,mutation,prevalence,genotype +NS5b,A395G,1.0,1A +""" + + write_resistance(aminos, resistance_csv, mutations_csv, self.algorithms) + + self.assertEqual(expected_resistance, resistance_csv.getvalue()) + self.assertEqual(expected_mutations, mutations_csv.getvalue()) + def test_hcv_low_coverage(self): aminos = [] resistance_csv = StringIO()