From bfeb2a623944cc4502cca2326443d09f0bdc03f3 Mon Sep 17 00:00:00 2001 From: Jeremy Fan Date: Tue, 1 Nov 2022 14:04:07 -0700 Subject: [PATCH 1/7] add bugfix changes to summary --- src/mavis/summary/summary.py | 7 +++-- tests/data/mock_dgv_annotation.txt | 2 ++ tests/test_mavis/summary/test_summary.py | 40 ++++++++++++++++++++++-- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/mavis/summary/summary.py b/src/mavis/summary/summary.py index 4c022e97..875b2e37 100644 --- a/src/mavis/summary/summary.py +++ b/src/mavis/summary/summary.py @@ -210,10 +210,11 @@ def annotate_dgv(bpps, dgv_regions_by_reference_name, distance=0): for bpp in [ b for b in bpps if not b.interchromosomal and b.break1.chr in dgv_regions_by_reference_name ]: + bpp.data['dgv'] = [] for dgv_region in dgv_regions_by_reference_name[bpp.break1.chr]: dist = abs(Interval.dist(Interval(dgv_region.start), bpp.break1)) if dist > lowest_resolution + distance: - break + continue elif ( dist > distance or abs(Interval.dist(Interval(dgv_region.end), bpp.break2)) > distance @@ -224,8 +225,8 @@ def annotate_dgv(bpps, dgv_regions_by_reference_name, distance=0): refname = dgv_region.reference_object.name except AttributeError: pass - bpp.data['dgv'] = '{}({}:{}-{})'.format( - dgv_region.name, refname, dgv_region.start, dgv_region.end + bpp.data['dgv'].append( + '{}({}:{}-{})'.format(dgv_region.name, refname, dgv_region.start, dgv_region.end) ) diff --git a/tests/data/mock_dgv_annotation.txt b/tests/data/mock_dgv_annotation.txt index ee303c99..bda6925c 100644 --- a/tests/data/mock_dgv_annotation.txt +++ b/tests/data/mock_dgv_annotation.txt @@ -1,6 +1,8 @@ chr start end name 1 1 2300000 nsv482937 1 10001 22118 dgv1n82 +1 10001 22120 rgv2n98 +1 10001 22221 rgv2n99 1 10001 127330 nsv7879 1 10191 10281 nsv958854 1 10377 177417 nsv428112 diff --git a/tests/test_mavis/summary/test_summary.py b/tests/test_mavis/summary/test_summary.py index b66a8134..a23d7b26 100644 --- a/tests/test_mavis/summary/test_summary.py +++ b/tests/test_mavis/summary/test_summary.py @@ -1,9 +1,10 @@ import pytest from mavis.breakpoint import Breakpoint, BreakpointPair from mavis.constants import CALL_METHOD, COLUMNS, PROTOCOL, STRAND, SVTYPE -from mavis.summary.summary import filter_by_annotations +from mavis.summary.summary import filter_by_annotations, annotate_dgv +from mavis.annotate.file_io import load_masking_regions -from ...util import todo +from ...util import todo, get_data @pytest.fixture @@ -40,11 +41,34 @@ def genomic_event2(): ) +@pytest.fixture +def genomic_event3(): + return BreakpointPair( + Breakpoint('1', 10001), + Breakpoint('1', 22118), + opposing_strands=True, + **{ + COLUMNS.event_type: SVTYPE.DEL, + COLUMNS.call_method: CALL_METHOD.CONTIG, + COLUMNS.fusion_sequence_fasta_id: None, + COLUMNS.protocol: PROTOCOL.GENOME, + COLUMNS.fusion_cdna_coding_start: None, + COLUMNS.fusion_cdna_coding_end: None, + COLUMNS.tracking_id: "genomic_event3", + } + ) + + @pytest.fixture def best_transcripts(): return {'ABCA': True, 'ABCD': True} +@pytest.fixture +def dgv_event2(): + return load_masking_regions(get_data("mock_dgv_annotation.txt")) + + class TestFilterByAnnotations: def test_filter_by_annotations_two_best_transcripts( self, genomic_event1, genomic_event2, best_transcripts @@ -174,3 +198,15 @@ def test_filtering_events_split(self): @todo def test_get_pairing_state(self): pass + + +class TestFilterByCallMethod: + def test_annotate_dgv_distance_bed(self, genomic_event3, dgv_event2): + bpps = [genomic_event3] + annotate_dgv(bpps, dgv_event2, 103) + print(bpps[0].data) + assert len(bpps[0].data['dgv']) == 3 + assert ( + bpps[0].data + == "['dgv1n82(1:10001-22118)', 'rgv2n98(1:10001-22120)', 'rgv2n99(1:10001-22221)']" + ) From 9714dd932a4cf9e1348a6779c446bd10dd3b9b76 Mon Sep 17 00:00:00 2001 From: Jeremy Fan Date: Tue, 1 Nov 2022 14:20:42 -0700 Subject: [PATCH 2/7] fix test syntax --- tests/test_mavis/summary/test_summary.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_mavis/summary/test_summary.py b/tests/test_mavis/summary/test_summary.py index a23d7b26..fd75cb60 100644 --- a/tests/test_mavis/summary/test_summary.py +++ b/tests/test_mavis/summary/test_summary.py @@ -207,6 +207,6 @@ def test_annotate_dgv_distance_bed(self, genomic_event3, dgv_event2): print(bpps[0].data) assert len(bpps[0].data['dgv']) == 3 assert ( - bpps[0].data - == "['dgv1n82(1:10001-22118)', 'rgv2n98(1:10001-22120)', 'rgv2n99(1:10001-22221)']" + bpps[0].data['dgv'] + == ['dgv1n82(1:10001-22118)', 'rgv2n98(1:10001-22120)', 'rgv2n99(1:10001-22221)'] ) From f8b80c9bdf0e7eafa6706815dff056eb603e42af Mon Sep 17 00:00:00 2001 From: Jeremy Fan Date: Tue, 1 Nov 2022 14:37:18 -0700 Subject: [PATCH 3/7] freeze tabulate version --- .github/workflows/build.yml | 1 + setup.cfg | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b9263f86..f2d3c665 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -96,6 +96,7 @@ jobs: run: | python -m pip install --upgrade pip setuptools wheel pip install mavis_config pandas snakemake + pip install tabulate==0.8.9 - uses: eWaterCycle/setup-singularity@v6 with: singularity-version: 3.6.4 diff --git a/setup.cfg b/setup.cfg index 4149c69c..3ed5680a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -45,6 +45,7 @@ install_requires = Shapely>=1.6.4.post1 shortuuid>=0.5.0 svgwrite + tabulate==0.8.9 typing_extensions>=4 setup_requires = pip>=9.0.0 From d93caf78d3073859f1ae8544955005a713ba6cf7 Mon Sep 17 00:00:00 2001 From: Jeremy Fan Date: Tue, 1 Nov 2022 14:50:23 -0700 Subject: [PATCH 4/7] reformat dgv output to string --- src/mavis/summary/summary.py | 3 ++- tests/test_mavis/summary/test_summary.py | 7 +------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/mavis/summary/summary.py b/src/mavis/summary/summary.py index 875b2e37..cd13de5b 100644 --- a/src/mavis/summary/summary.py +++ b/src/mavis/summary/summary.py @@ -228,7 +228,8 @@ def annotate_dgv(bpps, dgv_regions_by_reference_name, distance=0): bpp.data['dgv'].append( '{}({}:{}-{})'.format(dgv_region.name, refname, dgv_region.start, dgv_region.end) ) - + bpp.data['dgv'] = ','.join(bpp.data['dgv']) + print(bpp.data['dgv']) def get_pairing_state( current_protocol, diff --git a/tests/test_mavis/summary/test_summary.py b/tests/test_mavis/summary/test_summary.py index fd75cb60..f60ec1c4 100644 --- a/tests/test_mavis/summary/test_summary.py +++ b/tests/test_mavis/summary/test_summary.py @@ -204,9 +204,4 @@ class TestFilterByCallMethod: def test_annotate_dgv_distance_bed(self, genomic_event3, dgv_event2): bpps = [genomic_event3] annotate_dgv(bpps, dgv_event2, 103) - print(bpps[0].data) - assert len(bpps[0].data['dgv']) == 3 - assert ( - bpps[0].data['dgv'] - == ['dgv1n82(1:10001-22118)', 'rgv2n98(1:10001-22120)', 'rgv2n99(1:10001-22221)'] - ) + assert len(bpps[0].data['dgv'].split(',')) == 3 From 938c243353653912fa09599ee0aa04e0d48bbcd9 Mon Sep 17 00:00:00 2001 From: Jeremy Fan Date: Tue, 1 Nov 2022 14:51:37 -0700 Subject: [PATCH 5/7] get rid of print --- src/mavis/summary/summary.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mavis/summary/summary.py b/src/mavis/summary/summary.py index cd13de5b..1494111a 100644 --- a/src/mavis/summary/summary.py +++ b/src/mavis/summary/summary.py @@ -229,7 +229,6 @@ def annotate_dgv(bpps, dgv_regions_by_reference_name, distance=0): '{}({}:{}-{})'.format(dgv_region.name, refname, dgv_region.start, dgv_region.end) ) bpp.data['dgv'] = ','.join(bpp.data['dgv']) - print(bpp.data['dgv']) def get_pairing_state( current_protocol, From 092f690abb93653d5cbf26c21d0667cfe0008e38 Mon Sep 17 00:00:00 2001 From: Jeremy Fan Date: Tue, 1 Nov 2022 15:08:28 -0700 Subject: [PATCH 6/7] reformat --- src/mavis/summary/summary.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mavis/summary/summary.py b/src/mavis/summary/summary.py index 1494111a..6016e517 100644 --- a/src/mavis/summary/summary.py +++ b/src/mavis/summary/summary.py @@ -230,6 +230,7 @@ def annotate_dgv(bpps, dgv_regions_by_reference_name, distance=0): ) bpp.data['dgv'] = ','.join(bpp.data['dgv']) + def get_pairing_state( current_protocol, current_disease_state, From 8b932a6dac52d809465cfb5ea9a8a8c5d8dce660 Mon Sep 17 00:00:00 2001 From: Jeremy Fan Date: Tue, 1 Nov 2022 16:26:03 -0700 Subject: [PATCH 7/7] change delimiter --- src/mavis/summary/summary.py | 2 +- tests/test_mavis/summary/test_summary.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mavis/summary/summary.py b/src/mavis/summary/summary.py index 6016e517..a52711cd 100644 --- a/src/mavis/summary/summary.py +++ b/src/mavis/summary/summary.py @@ -228,7 +228,7 @@ def annotate_dgv(bpps, dgv_regions_by_reference_name, distance=0): bpp.data['dgv'].append( '{}({}:{}-{})'.format(dgv_region.name, refname, dgv_region.start, dgv_region.end) ) - bpp.data['dgv'] = ','.join(bpp.data['dgv']) + bpp.data['dgv'] = ';'.join(bpp.data['dgv']) def get_pairing_state( diff --git a/tests/test_mavis/summary/test_summary.py b/tests/test_mavis/summary/test_summary.py index f60ec1c4..50b2318a 100644 --- a/tests/test_mavis/summary/test_summary.py +++ b/tests/test_mavis/summary/test_summary.py @@ -204,4 +204,4 @@ class TestFilterByCallMethod: def test_annotate_dgv_distance_bed(self, genomic_event3, dgv_event2): bpps = [genomic_event3] annotate_dgv(bpps, dgv_event2, 103) - assert len(bpps[0].data['dgv'].split(',')) == 3 + assert len(bpps[0].data['dgv'].split(';')) == 3