Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/hotfix'
Browse files Browse the repository at this point in the history
  • Loading branch information
susannasiebert committed Dec 9, 2019
2 parents 5ea5160 + f892f15 commit 1c73528
Show file tree
Hide file tree
Showing 11 changed files with 65 additions and 28 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
# The short X.Y version.
version = '1.5'
# The full version, including alpha/beta/rc tags.
release = '1.5.3'
release = '1.5.4'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
26 changes: 11 additions & 15 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,21 +52,17 @@ New in release |release|

This is a hotfix release. It fixes the following issues:

- pVACbind would previously throw an error if a peptide sequence in the input
fasta was shorter than one of the chosen epitope lengths. This issue has
been fixed by first parsing the input fasta and creating individual fasta
files for each epitope length that enforce a minimum length of the peptide
sequences matching the respective epitope length.
- Previous versions of pVACtools resolved an issue where IEDB would output a
warning line if one of the epitope sequences only contained A, C, G, or T
amino acids, since those sequences could also be nuclotide sequences.
However, this issue was only fixed in pVACseq, not pVACbind, or pVACvector.
This release fixes this issue for all tools.
- The wrappers for NetChop or NetMHCstabpan split the set of input epitopes
into chunks of 100 before processing. Due to a bug in the file splitting
logic, one epitope for each chunk over 100 would be errenously dropped. This
effectively would result in less epitopes being returned in the filtered
report than if running the pipelines without NetChop or NetMHCstabpan.
- The ``pvacseq generate_protein_fasta`` command would previously error out
when running with a selected ``peptide_sequence_length`` that would reduce
in peptides < 7 amino acids long. This error would occur when calculating
manufacturability metrics. This release now only calculates these metrices
for peptides >=7 amino acids long.
- We updated the calculation for the flanking sequence length when generating
peptide sequences to result in peptides that are closer in length to the
requested ``peptide_sequence_length``.
- This release fixes an edge case where a frameshift mutation impacted the
first amino acid of a transcript. This case would previously throw a fatal
error but will now be processed as expected.

New in version |version|
------------------------
Expand Down
21 changes: 12 additions & 9 deletions lib/calculate_manufacturability.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,27 +49,30 @@ def append_manufacturability_metrics(self, line, peptide):
def execute(self):
if self.file_type == 'fasta':
with open(self.output_file, 'w') as output_fh:
writer = csv.DictWriter(output_fh, delimiter = "\t", fieldnames=['id', 'peptide_sequence'] + self.manufacturability_headers(), extrasaction='ignore')
writer = csv.DictWriter(output_fh, delimiter = "\t", fieldnames=['id', 'peptide_sequence'] + self.manufacturability_headers(), extrasaction='ignore', restval='NA')
writer.writeheader()
for record in SeqIO.parse(self.input_file, "fasta"):
seq_num = record.id
peptide = str(record.seq)
sequence = str(record.seq)
line = {
'id': seq_num,
'peptide_sequence': peptide
'peptide_sequence': sequence
}
peptide = PvacpeptideVaccinePeptide(peptide)
line = self.append_manufacturability_metrics(line, peptide)
if len(sequence) >= 7:
peptide = PvacpeptideVaccinePeptide(sequence)
line = self.append_manufacturability_metrics(line, peptide)
writer.writerow(line)
else:
with open(self.input_file) as input_fh, open(self.output_file, 'w') as output_fh:
reader = csv.DictReader(input_fh, delimiter = "\t")
writer = csv.DictWriter(output_fh, delimiter = "\t", fieldnames=reader.fieldnames + self.manufacturability_headers(), extrasaction='ignore')
writer = csv.DictWriter(output_fh, delimiter = "\t", fieldnames=reader.fieldnames + self.manufacturability_headers(), extrasaction='ignore', restval='NA')
writer.writeheader()
for line in reader:
if self.file_type == 'pVACbind':
peptide = PvacpeptideVaccinePeptide(line['Epitope Seq'])
sequence = line['Epitope Seq']
else:
peptide = PvacpeptideVaccinePeptide(line['MT Epitope Seq'])
line = self.append_manufacturability_metrics(line, peptide)
sequence = line['MT Epitope Seq']
if len(sequence) >= 7:
peptide = PvacpeptideVaccinePeptide(sequence)
line = self.append_manufacturability_metrics(line, peptide)
writer.writerow(line)
2 changes: 1 addition & 1 deletion lib/fasta_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def determine_peptide_sequence_length(self, full_wildtype_sequence_length, pepti
def determine_flanking_sequence_length(self, full_wildtype_sequence_length, peptide_sequence_length, line):
actual_peptide_sequence_length = self.determine_peptide_sequence_length(full_wildtype_sequence_length, peptide_sequence_length, line)
if actual_peptide_sequence_length%2 == 0:
return int((actual_peptide_sequence_length-2) / 2)
return int(actual_peptide_sequence_length / 2)
else:
return int((actual_peptide_sequence_length-1) / 2)

Expand Down
2 changes: 1 addition & 1 deletion lib/output_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def match_wildtype_and_mutant_entry_for_frameshift(self, result, mt_position, wt
result['wt_epitope_seq'] = 'NA'
result['wt_scores'] = dict.fromkeys(result['mt_scores'].keys(), 'NA')
mutation_position = self.find_mutation_position(wt_epitope_seq, mt_epitope_seq)
if mutation_position == 1 and int(previous_result['mutation_position']) <= 1:
if mutation_position == 1 and previous_result is not None and int(previous_result['mutation_position']) <= 1:
#The true mutation position is to the left of the current MT eptiope
mutation_position = 0
result['mutation_position'] = mutation_position
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@

setup(
name="pvactools",
version="1.5.3",
version="1.5.4",
packages=[
"tools",
"tools.pvacbind",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
peptide ic50 seq_num start allele
PRPKRLST 32090.567698455194 2 2 HLA-A*02:01
RPRPKRLS 32488.54503574128 2 1 HLA-A*02:01
AAAPEAPV 10669.99440612248 1 1 HLA-A*02:01
RPKRLSTR 31020.901282749608 2 3 HLA-A*02:01
APEAPVYA 17866.570741869527 1 3 HLA-A*02:01
PKRLSTRT 31935.534444637513 2 4 HLA-A*02:01
AAPEAPVY 30318.795326400505 1 2 HLA-A*02:01
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
1:
- WT.1.JUND.ENST00000600972.FS.1GC/G
2:
- MT.1.JUND.ENST00000600972.FS.1GC/G
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
chromosome_name start stop reference variant gene_name transcript_name transcript_support_level amino_acid_change codon_change ensembl_gene_id hgvsc hgvsp wildtype_amino_acid_sequence downstream_amino_acid_sequence fusion_amino_acid_sequence variant_type protein_position transcript_expression gene_expression normal_depth normal_vaf tdna_depth tdna_vaf trna_depth trna_vaf index protein_length_change
19 18280928 18280929 GC G JUND ENST00000600972 2 A/X Gcg/cg ENSG00000130522 ENST00000600972.1:c.1del ENSP00000475153.2:p.Ala1ArgfsTer? AAAPEAPVYANLSSYAGGAGGAGGAATVAFAAEPVPFPPPPPPGALGPPRLAALKDEPQTVPDVPSFGESPPLSPIDMDTQERIKAERKRLRNRIAASKCRKRKLERISRLEEKVKTLKSQNTELASTASLLREQVAQLKQKVLSHVNSGCQLLPQHQREEQSVRF RPRPKRLSTRT FS 1 NA NA NA NA 4 1.0 NA NA 1.JUND.ENST00000600972.FS.1GC/G -154
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Chromosome Start Stop Reference Variant Transcript Transcript Support Level Ensembl Gene ID Variant Type Mutation Protein Position Gene Name HGVSc HGVSp HLA Allele Peptide Length Sub-peptide Position Mutation Position MT Epitope Seq WT Epitope Seq Best MT Score Method Best MT Score Corresponding WT Score Corresponding Fold Change Tumor DNA Depth Tumor DNA VAF Tumor RNA Depth Tumor RNA VAF Normal Depth Normal VAF Gene Expression Transcript Expression Median MT Score Median WT Score Median Fold Change MHCnuggetsI WT Score MHCnuggetsI MT Score
19 18280928 18280929 GC G ENST00000600972 2 ENSG00000130522 FS A/X 1 JUND ENST00000600972.1:c.1del ENSP00000475153.2:p.Ala1ArgfsTer? HLA-A*02:01 8 4 0 PKRLSTRT NA MHCnuggetsI 31935.534 NA NA 4 1.0 NA NA NA NA NA NA 31935.534 NA NA NA 31935.534444637513
19 18280928 18280929 GC G ENST00000600972 2 ENSG00000130522 FS A/X 1 JUND ENST00000600972.1:c.1del ENSP00000475153.2:p.Ala1ArgfsTer? HLA-A*02:01 8 1 1 RPRPKRLS NA MHCnuggetsI 32488.545 NA NA 4 1.0 NA NA NA NA NA NA 32488.545 NA NA NA 32488.54503574128
19 18280928 18280929 GC G ENST00000600972 2 ENSG00000130522 FS A/X 1 JUND ENST00000600972.1:c.1del ENSP00000475153.2:p.Ala1ArgfsTer? HLA-A*02:01 8 3 0 RPKRLSTR NA MHCnuggetsI 31020.901 NA NA 4 1.0 NA NA NA NA NA NA 31020.901 NA NA NA 31020.901282749608
19 18280928 18280929 GC G ENST00000600972 2 ENSG00000130522 FS A/X 1 JUND ENST00000600972.1:c.1del ENSP00000475153.2:p.Ala1ArgfsTer? HLA-A*02:01 8 2 0 PRPKRLST NA MHCnuggetsI 32090.568 NA NA 4 1.0 NA NA NA NA NA NA 32090.568 NA NA NA 32090.567698455194
19 changes: 19 additions & 0 deletions tests/test_output_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,25 @@ def test_input_frameshift_variant_feature_truncation2_gets_parsed_correctly(self
expected_output_file = os.path.join(self.test_data_dir, "output_frameshift_variant_feature_truncation2.iedb.parsed.tsv")
self.assertTrue(compare(parse_output_output_file.name, expected_output_file))

def test_input_frameshift_variant_position_1_gets_parsed_correctly(self):
parse_output_input_iedb_file = [os.path.join(self.test_data_dir, "input_frameshift_variant_position_1.MHCnuggetsI.HLA-A*02:01.8.tsv")]
parse_output_input_tsv_file = os.path.join(self.test_data_dir, "input_frameshift_variant_position_1.tsv")
parse_output_key_file = os.path.join(self.test_data_dir, "input_frameshift_variant_position_1.key")
parse_output_output_file = tempfile.NamedTemporaryFile()

parse_output_params = {
'input_iedb_files' : parse_output_input_iedb_file,
'input_tsv_file' : parse_output_input_tsv_file,
'key_file' : parse_output_key_file,
'output_file' : parse_output_output_file.name,
'sample_name' : None,
}
parser = DefaultOutputParser(**parse_output_params)

self.assertFalse(parser.execute())
expected_output_file = os.path.join(self.test_data_dir, "output_frameshift_variant_position_1.iedb.parsed.tsv")
self.assertTrue(compare(parse_output_output_file.name, expected_output_file))

def test_input_inframe_deletion_aa_deletion_gets_parsed_correctly(self):
parse_output_input_iedb_file = [os.path.join(self.test_data_dir, "input_inframe_deletion_aa_deletion.ann.HLA-A*29:02.9.tsv")]
parse_output_input_tsv_file = os.path.join(self.test_data_dir, "input_inframe_deletion_aa_deletion.tsv")
Expand Down

0 comments on commit 1c73528

Please sign in to comment.