Skip to content

Commit

Permalink
Merge pull request #337 from bcgsc/bugfix/MAVIS_summary_clustering
Browse files Browse the repository at this point in the history
Bugfix to Summary module clustering output
  • Loading branch information
zhemingfan authored Nov 1, 2022
2 parents 7e91a39 + 8b932a6 commit 8eb2929
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 5 deletions.
1 change: 1 addition & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ jobs:
run: |
python -m pip install --upgrade pip setuptools wheel
pip install mavis_config pandas snakemake
pip install tabulate==0.8.9
- uses: eWaterCycle/setup-singularity@v6
with:
singularity-version: 3.6.4
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ install_requires =
Shapely>=1.6.4.post1
shortuuid>=0.5.0
svgwrite
tabulate==0.8.9
typing_extensions>=4
setup_requires =
pip>=9.0.0
Expand Down
8 changes: 5 additions & 3 deletions src/mavis/summary/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,11 @@ def annotate_dgv(bpps, dgv_regions_by_reference_name, distance=0):
for bpp in [
b for b in bpps if not b.interchromosomal and b.break1.chr in dgv_regions_by_reference_name
]:
bpp.data['dgv'] = []
for dgv_region in dgv_regions_by_reference_name[bpp.break1.chr]:
dist = abs(Interval.dist(Interval(dgv_region.start), bpp.break1))
if dist > lowest_resolution + distance:
break
continue
elif (
dist > distance
or abs(Interval.dist(Interval(dgv_region.end), bpp.break2)) > distance
Expand All @@ -224,9 +225,10 @@ def annotate_dgv(bpps, dgv_regions_by_reference_name, distance=0):
refname = dgv_region.reference_object.name
except AttributeError:
pass
bpp.data['dgv'] = '{}({}:{}-{})'.format(
dgv_region.name, refname, dgv_region.start, dgv_region.end
bpp.data['dgv'].append(
'{}({}:{}-{})'.format(dgv_region.name, refname, dgv_region.start, dgv_region.end)
)
bpp.data['dgv'] = ';'.join(bpp.data['dgv'])


def get_pairing_state(
Expand Down
2 changes: 2 additions & 0 deletions tests/data/mock_dgv_annotation.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
chr start end name
1 1 2300000 nsv482937
1 10001 22118 dgv1n82
1 10001 22120 rgv2n98
1 10001 22221 rgv2n99
1 10001 127330 nsv7879
1 10191 10281 nsv958854
1 10377 177417 nsv428112
Expand Down
35 changes: 33 additions & 2 deletions tests/test_mavis/summary/test_summary.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import pytest
from mavis.breakpoint import Breakpoint, BreakpointPair
from mavis.constants import CALL_METHOD, COLUMNS, PROTOCOL, STRAND, SVTYPE
from mavis.summary.summary import filter_by_annotations
from mavis.summary.summary import filter_by_annotations, annotate_dgv
from mavis.annotate.file_io import load_masking_regions

from ...util import todo
from ...util import todo, get_data


@pytest.fixture
Expand Down Expand Up @@ -40,11 +41,34 @@ def genomic_event2():
)


@pytest.fixture
def genomic_event3():
return BreakpointPair(
Breakpoint('1', 10001),
Breakpoint('1', 22118),
opposing_strands=True,
**{
COLUMNS.event_type: SVTYPE.DEL,
COLUMNS.call_method: CALL_METHOD.CONTIG,
COLUMNS.fusion_sequence_fasta_id: None,
COLUMNS.protocol: PROTOCOL.GENOME,
COLUMNS.fusion_cdna_coding_start: None,
COLUMNS.fusion_cdna_coding_end: None,
COLUMNS.tracking_id: "genomic_event3",
}
)


@pytest.fixture
def best_transcripts():
return {'ABCA': True, 'ABCD': True}


@pytest.fixture
def dgv_event2():
return load_masking_regions(get_data("mock_dgv_annotation.txt"))


class TestFilterByAnnotations:
def test_filter_by_annotations_two_best_transcripts(
self, genomic_event1, genomic_event2, best_transcripts
Expand Down Expand Up @@ -174,3 +198,10 @@ def test_filtering_events_split(self):
@todo
def test_get_pairing_state(self):
pass


class TestFilterByCallMethod:
def test_annotate_dgv_distance_bed(self, genomic_event3, dgv_event2):
bpps = [genomic_event3]
annotate_dgv(bpps, dgv_event2, 103)
assert len(bpps[0].data['dgv'].split(';')) == 3

0 comments on commit 8eb2929

Please sign in to comment.