Skip to content

Commit

Permalink
Merge pull request #1079 from griffithlab/issue_1074
Browse files Browse the repository at this point in the history
Handle nan values in the Mutation Position column
  • Loading branch information
susannasiebert authored Mar 13, 2024
2 parents a3d3e5d + 5f64dc2 commit ae1f135
Show file tree
Hide file tree
Showing 6 changed files with 5,398 additions and 5 deletions.
1 change: 1 addition & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ jobs:
run: |
pip install polars==0.16.18
pip install pypandoc==1.7.2
pip install "tensorflow<2.16"
pip install git+https://github.com/griffithlab/bigmhc.git#egg=bigmhc
pip install git+https://github.com/griffithlab/deepimmuno.git#egg=deepimmuno
pip install -e .
Expand Down
10 changes: 5 additions & 5 deletions pvactools/lib/aggregate_all_epitopes.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,6 @@ def read_input_file(self, used_columns, dtypes):
def get_sub_df(self, all_epitopes_df, key):
key_str = "{}-{}-{}-{}-{}".format(key[0], key[1], key[2], key[3], key[4])
df = (all_epitopes_df[lambda x: (x['Chromosome'] == key[0]) & (x['Start'] == key[1]) & (x['Stop'] == key[2]) & (x['Reference'] == key[3]) & (x['Variant'] == key[4])]).copy()
df['Variant Type'] = df['Variant Type'].cat.add_categories('NA')
df['Mutation Position'] = df['Mutation Position'].cat.add_categories('NA')
df['annotation'] = df[['Transcript', 'Gene Name', 'Mutation', 'Protein Position']].agg('-'.join, axis=1)
df['key'] = key_str
return (df, key_str)
Expand Down Expand Up @@ -367,14 +365,16 @@ def is_anchor_residue_pass(self, mutation):
anchors = self.get_anchor_positions(mutation['HLA Allele'], len(mutation['MT Epitope Seq']))
# parse out mutation position from str
position = mutation["Mutation Position"]
if '-' in position:
if pd.isna(position):
return anchor_residue_pass
elif '-' in position:
d_ind = position.index('-')
if all(pos in anchors for pos in range(int(position[0:d_ind]), int(position[d_ind+1:])+1)):
if pd.isna(mutation["{} WT IC50 Score".format(self.wt_top_score_metric)]):
anchor_residue_pass = False
elif mutation["{} WT IC50 Score".format(self.wt_top_score_metric)] < binding_threshold:
anchor_residue_pass = False
elif position != "NA":
else:
if int(float(position)) in anchors:
if pd.isna(mutation["{} WT IC50 Score".format(self.wt_top_score_metric)]):
anchor_residue_pass = False
Expand Down Expand Up @@ -571,7 +571,7 @@ def get_good_binders_metrics(self, good_binders, prediction_algorithms, el_algor
individual_el_calls[peptide_type] = el_calls
individual_el_percentile_calls[peptide_type] = el_percentile_calls
results[peptide]['hla_types'] = sorted(self.hla_types)
results[peptide]['mutation_position'] = str(good_binders_peptide_annotation.iloc[0]['Mutation Position'])
results[peptide]['mutation_position'] = "NA" if pd.isna(good_binders_peptide_annotation.iloc[0]['Mutation Position']) else str(good_binders_peptide_annotation.iloc[0]['Mutation Position'])
results[peptide]['problematic_positions'] = str(good_binders_peptide_annotation.iloc[0]['Problematic Positions']) if 'Problematic Positions' in good_binders_peptide_annotation.iloc[0] else 'None'
if len(anchor_fails) > 0:
results[peptide]['anchor_fails'] = ', '.join(anchor_fails)
Expand Down
26 changes: 26 additions & 0 deletions tests/test_aggregate_all_epitopes.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,32 @@ def test_aggregate_all_epitopes_HCC1395_pvacseq_runs_and_produces_expected_outpu
self.assertTrue(os.path.isfile(pvacview_file))
os.remove(pvacview_file)

def test_aggregate_all_epitopes_pvacseq_na_mutation_position_runs_and_produces_expected_output(self):
self.assertTrue(py_compile.compile(self.executable))
output_file = tempfile.NamedTemporaryFile(suffix='.tsv')
self.assertFalse(PvacseqAggregateAllEpitopes(os.path.join(self.test_data_dir, 'Test.all_epitopes.na_mutation_position.tsv'), output_file.name).execute())
self.assertTrue(cmp(
output_file.name,
os.path.join(self.test_data_dir, "output.na_mutation_position.tsv"),
))

metrics_file = output_file.name.replace('.tsv', '.metrics.json')
self.assertTrue(cmp(
metrics_file,
os.path.join(self.test_data_dir, "output.na_mutation_position.metrics.json"),
))
os.remove(metrics_file)

for i in ["ui.R", "app.R", "server.R", "styling.R", "anchor_and_helper_functions.R"]:
pvacview_file = os.path.join(os.path.dirname(output_file.name), i)
self.assertTrue(os.path.isfile(pvacview_file))
os.remove(pvacview_file)

for i in ["anchor.jpg", "pVACview_logo.png", "pVACview_logo_mini.png"]:
pvacview_file = os.path.join(os.path.dirname(output_file.name), "www", i)
self.assertTrue(os.path.isfile(pvacview_file))
os.remove(pvacview_file)

def test_aggregate_all_epitopes_pvacfuse_runs_and_produces_expected_output(self):
self.assertTrue(py_compile.compile(self.executable))
output_file = tempfile.NamedTemporaryFile(suffix='.tsv')
Expand Down
Loading

0 comments on commit ae1f135

Please sign in to comment.