diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b9263f86..f2d3c665 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -96,6 +96,7 @@ jobs: run: | python -m pip install --upgrade pip setuptools wheel pip install mavis_config pandas snakemake + pip install tabulate==0.8.9 - uses: eWaterCycle/setup-singularity@v6 with: singularity-version: 3.6.4 diff --git a/setup.cfg b/setup.cfg index 4149c69c..3ed5680a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -45,6 +45,7 @@ install_requires = Shapely>=1.6.4.post1 shortuuid>=0.5.0 svgwrite + tabulate==0.8.9 typing_extensions>=4 setup_requires = pip>=9.0.0 diff --git a/src/mavis/summary/summary.py b/src/mavis/summary/summary.py index 4c022e97..a52711cd 100644 --- a/src/mavis/summary/summary.py +++ b/src/mavis/summary/summary.py @@ -210,10 +210,11 @@ def annotate_dgv(bpps, dgv_regions_by_reference_name, distance=0): for bpp in [ b for b in bpps if not b.interchromosomal and b.break1.chr in dgv_regions_by_reference_name ]: + bpp.data['dgv'] = [] for dgv_region in dgv_regions_by_reference_name[bpp.break1.chr]: dist = abs(Interval.dist(Interval(dgv_region.start), bpp.break1)) if dist > lowest_resolution + distance: - break + continue elif ( dist > distance or abs(Interval.dist(Interval(dgv_region.end), bpp.break2)) > distance @@ -224,9 +225,10 @@ def annotate_dgv(bpps, dgv_regions_by_reference_name, distance=0): refname = dgv_region.reference_object.name except AttributeError: pass - bpp.data['dgv'] = '{}({}:{}-{})'.format( - dgv_region.name, refname, dgv_region.start, dgv_region.end + bpp.data['dgv'].append( + '{}({}:{}-{})'.format(dgv_region.name, refname, dgv_region.start, dgv_region.end) ) + bpp.data['dgv'] = ';'.join(bpp.data['dgv']) def get_pairing_state( diff --git a/tests/data/mock_dgv_annotation.txt b/tests/data/mock_dgv_annotation.txt index ee303c99..bda6925c 100644 --- a/tests/data/mock_dgv_annotation.txt +++ b/tests/data/mock_dgv_annotation.txt @@ -1,6 +1,8 @@ chr start end name 1 1 2300000 nsv482937 1 10001 22118 dgv1n82 +1 10001 22120 rgv2n98 +1 10001 22221 rgv2n99 1 10001 127330 nsv7879 1 10191 10281 nsv958854 1 10377 177417 nsv428112 diff --git a/tests/test_mavis/summary/test_summary.py b/tests/test_mavis/summary/test_summary.py index b66a8134..50b2318a 100644 --- a/tests/test_mavis/summary/test_summary.py +++ b/tests/test_mavis/summary/test_summary.py @@ -1,9 +1,10 @@ import pytest from mavis.breakpoint import Breakpoint, BreakpointPair from mavis.constants import CALL_METHOD, COLUMNS, PROTOCOL, STRAND, SVTYPE -from mavis.summary.summary import filter_by_annotations +from mavis.summary.summary import filter_by_annotations, annotate_dgv +from mavis.annotate.file_io import load_masking_regions -from ...util import todo +from ...util import todo, get_data @pytest.fixture @@ -40,11 +41,34 @@ def genomic_event2(): ) +@pytest.fixture +def genomic_event3(): + return BreakpointPair( + Breakpoint('1', 10001), + Breakpoint('1', 22118), + opposing_strands=True, + **{ + COLUMNS.event_type: SVTYPE.DEL, + COLUMNS.call_method: CALL_METHOD.CONTIG, + COLUMNS.fusion_sequence_fasta_id: None, + COLUMNS.protocol: PROTOCOL.GENOME, + COLUMNS.fusion_cdna_coding_start: None, + COLUMNS.fusion_cdna_coding_end: None, + COLUMNS.tracking_id: "genomic_event3", + } + ) + + @pytest.fixture def best_transcripts(): return {'ABCA': True, 'ABCD': True} +@pytest.fixture +def dgv_event2(): + return load_masking_regions(get_data("mock_dgv_annotation.txt")) + + class TestFilterByAnnotations: def test_filter_by_annotations_two_best_transcripts( self, genomic_event1, genomic_event2, best_transcripts @@ -174,3 +198,10 @@ def test_filtering_events_split(self): @todo def test_get_pairing_state(self): pass + + +class TestFilterByCallMethod: + def test_annotate_dgv_distance_bed(self, genomic_event3, dgv_event2): + bpps = [genomic_event3] + annotate_dgv(bpps, dgv_event2, 103) + assert len(bpps[0].data['dgv'].split(';')) == 3