Skip to content

Commit

Permalink
Display genotype on HCV reports for #412.
Browse files Browse the repository at this point in the history
Also display Canadian drug names and updated HCV disclaimer.
  • Loading branch information
donkirkby committed Jan 6, 2018
1 parent 062c5bc commit f152e26
Show file tree
Hide file tree
Showing 8 changed files with 223 additions and 165 deletions.
6 changes: 4 additions & 2 deletions micall/hivdb/genreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,11 @@ def get_reported_drug_classes(self):

def read_mutations(drug_classes, csv_file):
"""Read in a mutations file from CSV.
Returns a list of dictionaries.
"""
err_string = "Error in mutations file '{}'".format(csv_file.name)
exp_set = frozenset("drug_class,mutation,prevalence".split(","))
exp_set = frozenset("drug_class,mutation,prevalence,genotype".split(","))
data_lst = list(csv.DictReader(csv_file, restkey="dummy"))
# make sure that all lines have exactly the required fields
if sum([set(od.keys()) == exp_set for od in data_lst]) != len(data_lst):
Expand Down Expand Up @@ -206,7 +207,7 @@ def read_resistance(regions, csv_file):
"""
err_string = "Error in resistance file '{}'".format(csv_file.name)
exp_set = frozenset(
"region,drug_class,drug,drug_name,level,level_name,score".split(","))
"region,drug_class,drug,drug_name,level,level_name,score,genotype".split(","))
data_lst = list(csv.DictReader(csv_file, restkey="dummy"))
# make sure that all lines have exactly the required fields
if sum([set(od.keys()) == exp_set for od in data_lst]) != len(data_lst):
Expand All @@ -215,6 +216,7 @@ def read_resistance(regions, csv_file):
report_page = regions[od['region']]
level = int(od['level'])
drug_id = od['drug']
report_page.genotype = od['genotype']
report_page.resistance_calls[drug_id] = (level, od["level_name"])


Expand Down
86 changes: 49 additions & 37 deletions micall/hivdb/genreport.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,31 @@
# NOTE: The order of the drugs in each drug_class will determine the order
# in the report tables.
INSTI:
- [DTG, dolutegravir]
- [EVG, elvitegravir]
- [RAL, raltegravir]
- [DTG, Dolutegravir]
- [EVG, Elvitegravir]
- [RAL, Raltegravir]
PI:
- [ATV/r, atazanavir/r]
- [DRV/r, darunavir/r]
- [FPV/r, fosamprenavir/r]
- [IDV/r, indinavir/r]
- [LPV/r, lopinavir/r]
- [NFV, nelfinavir]
- [SQV/r, saquinavir/r]
- [TPV/r, tipranavir/r]
- [ATV/r, Atazanavir/r]
- [DRV/r, Darunavir/r]
- [FPV/r, Fosamprenavir/r]
- [IDV/r, Indinavir/r]
- [LPV/r, Lopinavir/r]
- [NFV, Nelfinavir]
- [SQV/r, Saquinavir/r]
- [TPV/r, Tipranavir/r]
NRTI:
- [3TC, lamivudine]
- [ABC, abacavir]
- [AZT, zidovudine]
- [D4T, stavudine]
- [DDI, didanosine]
- [FTC, emtricitabine]
- [TDF, tenofovir]
- [3TC, Lamivudine]
- [ABC, Abacavir]
- [AZT, Zidovudine]
- [D4T, Stavudine]
- [DDI, Didanosine]
- [FTC, Emtricitabine]
- [TDF, Tenofovir]
NNRTI:
- [EFV, efavirenz]
- [ETR, etravirine]
- [NVP, nevirapine]
- [RPV, rilpivirine]
- [EFV, Efavirenz]
- [ETR, Etravirine]
- [NVP, Nevirapine]
- [RPV, Rilpivirine]
known_drug_classes:
#the order in this list determines the order of the drug_class tables.
- [NRTI, NRTI/NtRTI]
Expand Down Expand Up @@ -75,21 +75,22 @@
# NOTE: The order of the drugs in each drug_class will determine the order
# in the report tables.
NS3:
- [BPV, Boceprevir]
- [GZR, Grazoprevir]
- [PTV, Paritaprevir]
- [SPV, Simeprevir]
- [TPV, Telaprevir]
- [BPV, Boceprevir (Victrelis™)]
- [GZR, Grazoprevir (a component of Zepatier™)]
- [PTV, Paritaprevir (a component of Technivie™ and Holkira Pak™)]
- [SPV, Simeprevir (Galexos™)]
- [TPV, Telaprevir (Incivek™)]
NS5a:
- [DCV, Daclatasvir]
- [EBV, Elbasvir]
- [LDV, Ledipasvir]
- [OBV, Ombitasvir]
- [VEL, Velpatasvir]
- [DCV, Daclatasvir (Daklinza™)]
- [EBV, Elbasvir (a component of Zepatier™)]
- [LDV, Ledipasvir (a component of Harvoni™)]
- [OBV, Ombitasvir (a component of Technivie™ and Holkira Pak™)]
- [VEL, Velpatasvir (a component of Epclusa™)]
NS5b:
- [DSV, Dasabuvir]
- [SOF-EPC, "Sofosbuvir (Epclusa)"]
- [SOF-HAR, "Sofosbuvir (Harvoni)"]
- [DSV, Dasabuvir (a component of Holkira Pak™)]
- [SOF-EPC, Sofosbuvir (a component of Epclusa™)]
- [SOF-HAR, Sofosbuvir (a component of Harvoni™)]

known_drug_classes:
#the order in this list determines the order of the drug_class tables.
- [NS3, HCV NS3]
Expand All @@ -104,7 +105,18 @@
3: ['Resistance Likely', 0xDD0000, 0xFFFFFF]

disclaimer_text: >
TODO: HCV disclaimer text
Mutations in NS3, NS5A and NS5B were detected by deep sequencing of HCV.
The mutations considered and the interpretation algorithm can be found at
http://cfe-lab.github.io/MiCall . Mutations relative to genotype-specific
reference sequences detected above a prevalence of 5% of the total coverage
are reported here. This resistance scoring algorithm is still in
development and should be regarded as investigational; it is currently
defined as "Research Use Only". Patient management should not be based
solely on drug susceptibility results provided in this report. The clinical
response to the treatment regimen depends on many factors including patient
disease status, viral load, number and types of direct acting antivirals
and treatment duration.
generated_by_text: >
Generated by MiCall {} on Illumina BaseSpace using ???, modified on ???.
Generated by MiCall {} on Illumina BaseSpace using cfe-hcv 1.5, modified on
7 Dec 2016.
6 changes: 0 additions & 6 deletions micall/hivdb/hcv_rules.json
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,6 @@
"reference": "HCV1B-Con1-NS5b",
"region": "NS5b",
"genotype": "1B"
},
{
"rules": "SCORE FROM ( 142T => 4, 159F => 4, 237G => 4, 282T => 8, 314IFP => 4, 321A => 4, 355H => 4 )",
"reference": "HCV3-S52-NS5b",
"region": "NS5b",
"genotype": "3"
}
],
"name": "SOF-HAR",
Expand Down
57 changes: 47 additions & 10 deletions micall/hivdb/hivdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import os
from argparse import ArgumentParser, FileType
from collections import namedtuple
from csv import DictReader, DictWriter
from itertools import groupby
from operator import itemgetter
Expand All @@ -15,6 +16,8 @@
HIV_RULES_PATH = os.path.join(os.path.dirname(__file__), 'HIVDB_8.3.xml')
HCV_RULES_PATH = os.path.join(os.path.dirname(__file__), 'hcv_rules.json')

AminoList = namedtuple('AminoList', 'region aminos seed')


def parse_args():
parser = ArgumentParser(
Expand Down Expand Up @@ -46,6 +49,17 @@ def get_reported_region(reference):
return reference


def get_genotype(seed):
if seed is None:
return None
parts = seed.split('-')
virus = parts[0]
if virus != 'HCV':
return None
full_genotype = parts[1]
return full_genotype[0]


def find_good_regions(original_regions, coverage_scores_csv):
good_regions = {}
for row in DictReader(coverage_scores_csv):
Expand All @@ -69,16 +83,16 @@ def read_aminos(amino_csv, min_fraction, reported_regions=None):
missing_regions = set()
if reported_regions:
missing_regions.update(reported_regions.keys())
for region, rows in groupby(DictReader(amino_csv),
itemgetter('region')):
for (region, seed), rows in groupby(DictReader(amino_csv),
itemgetter('region', 'seed')):
if reported_regions is not None:
missing_regions.discard(region)
translated_region, is_reported = reported_regions.get(region,
(None, None))
if translated_region is None:
continue
if not is_reported:
yield region, None
yield AminoList(region, None, None)
continue
aminos = []
for row in rows:
Expand All @@ -92,9 +106,9 @@ def read_aminos(amino_csv, min_fraction, reported_regions=None):
if ins_count >= min_count:
pos_aminos['i'] = ins_count / coverage
aminos.append(pos_aminos)
yield region, aminos
yield AminoList(region, aminos, seed)
for region in missing_regions:
yield region, None
yield AminoList(region, None, None)


def write_insufficient_data(resistance_writer, region, asi):
Expand All @@ -113,21 +127,42 @@ def write_insufficient_data(resistance_writer, region, asi):


def write_resistance(aminos, resistance_csv, mutations_csv):
""" Calculate resistance scores and write them to files.
:param list[AminoList] aminos: region is the coordinate
reference name that this gene region was mapped to, and prevalance is a
float between 0.0 and 1.0
:param resistance_csv: open file to write resistance calls to, grouped by
genotype, region, drug_class
:param mutations_csv: open file to write mutations to, grouped by genotype,
drug_class
"""
resistance_writer = DictWriter(
resistance_csv,
['region', 'drug_class', 'drug', 'drug_name', 'level', 'level_name', 'score'],
['region',
'drug_class',
'drug',
'drug_name',
'level',
'level_name',
'score',
'genotype'],
lineterminator=os.linesep)
resistance_writer.writeheader()
mutations_writer = DictWriter(mutations_csv,
['drug_class', 'mutation', 'prevalence'],
['drug_class',
'mutation',
'prevalence',
'genotype'],
lineterminator=os.linesep)
mutations_writer.writeheader()
algorithms = load_asi()
for region, amino_seq in aminos:
for region, amino_seq, seed in aminos:
asi = algorithms.get(region)
if asi is None:
continue
reported_region = get_reported_region(region)
genotype = get_genotype(seed)
if amino_seq is None:
write_insufficient_data(resistance_writer, region, asi)
continue
Expand All @@ -139,7 +174,8 @@ def write_resistance(aminos, resistance_csv, mutations_csv):
drug_name=drug_result.name,
level_name=drug_result.level_name,
level=drug_result.level,
score=drug_result.score))
score=drug_result.score,
genotype=genotype))
for drug_class, class_mutations in result.mutations.items():
for mutation in class_mutations:
amino = mutation[-1]
Expand All @@ -148,7 +184,8 @@ def write_resistance(aminos, resistance_csv, mutations_csv):
prevalence = pos_aminos[amino]
mutations_writer.writerow(dict(drug_class=drug_class,
mutation=mutation,
prevalence=prevalence))
prevalence=prevalence,
genotype=genotype))


def load_asi():
Expand Down
23 changes: 8 additions & 15 deletions micall/hivdb/pdfreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def drug_class_tablst(row_offset, report_page, dc_name, level_coltab):
level, level_name = resistance_dct[drug_id]
else:
level, level_name = 1, "NOT REPORTED"
t_data.append([drug_name.capitalize(), level_name])
t_data.append([drug_name, level_name])
# determine colours for the level
bg_col, fg_col = level_coltab[level]
t_style.extend([('TEXTCOLOR', (1, tabline + drow_min), (1, tabline + drow_min), fg_col),
Expand All @@ -114,17 +114,7 @@ def drug_class_tablst(row_offset, report_page, dc_name, level_coltab):
return t_data, t_style


def drug_class_table(cfg_dct, dc_name, level_coltab, tabwidth):
"""Generate a resistance report for a given drug class.
tabwidth: the total width allocated for the table.
"""
# NOTE: this fudge factor ensures that the left, drug_name column, is not too wide.
t_data, t_style = drug_class_tablst(0, cfg_dct, dc_name, level_coltab)
colw = tabwidth * 0.36
return plat.Table(t_data, vAlign="TOP", style=t_style, colWidths=[colw, None])


def top_table(sample_name, table_width):
def top_table(sample_name, table_width, genotype):
"""Generate a (mostly empty) top table of three main columns.
table_width: the overall width of the table.
"""
Expand All @@ -135,7 +125,10 @@ def top_table(sample_name, table_width):
test_dl = [["Patient/Sample Details", "Test Details", "Physician Details"],
["", test_details_para("Sample ID: {}".format(samp_name)), ""],
["", test_details_para("Report Date: {}".format(nowstr)), ""],
["", "", ""],
["",
(genotype or "") and
test_details_para("Genotype: " + genotype),
""],
["", "", ""]
]
rn_min, rn_max = 1, len(test_dl) - 1
Expand Down Expand Up @@ -180,7 +173,7 @@ def write_report_one_column(report_pages, fname, sample_name=None):
doc_els.append(plat.Paragraph(cfg_dct["report_title"], ti_style))
doc_els.append(plat.Paragraph("For research use only", re_style))
# -- top table
doc_els.append(top_table(sample_name, table_width))
doc_els.append(top_table(sample_name, table_width, report_page.genotype))
# now drug classes tables, two per line
known_dc_lst = cfg_dct["known_dclass_list"]
tot_tab, tot_style = [], []
Expand All @@ -193,7 +186,7 @@ def write_report_one_column(report_pages, fname, sample_name=None):
tot_style.extend([("VALIGN", (0, 0), (1, num_rows-1), "TOP"),
("FONTSIZE", (0, 0), (1, num_rows-1), TAB_FONT_SIZE),
("LEADING", (0, 0), (1, num_rows-1), TAB_FONT_SIZE)])
left_col_w = table_width * 0.36
left_col_w = table_width * 0.5
right_col_w = table_width - left_col_w
doc_els.append(plat.Table(tot_tab,
vAlign="TOP",
Expand Down
2 changes: 1 addition & 1 deletion micall/tests/test_genreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_repr(self):

self.assertEqual(expected_repr, r)

def test(self):
def test_get_reported_drug_classes(self):
page = ReportPage(dict(known_drug_classes=[('C1', 'Class 1'),
('C2', 'Class 2')],
known_drugs={'C1': [('D1', 'Drug 1')],
Expand Down
Loading

0 comments on commit f152e26

Please sign in to comment.