Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix to Summary module clustering output #337

Merged
merged 7 commits into from
Nov 1, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ jobs:
run: |
python -m pip install --upgrade pip setuptools wheel
pip install mavis_config pandas snakemake
pip install tabulate==0.8.9
- uses: eWaterCycle/setup-singularity@v6
with:
singularity-version: 3.6.4
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ install_requires =
Shapely>=1.6.4.post1
shortuuid>=0.5.0
svgwrite
tabulate==0.8.9
typing_extensions>=4
setup_requires =
pip>=9.0.0
Expand Down
7 changes: 4 additions & 3 deletions src/mavis/summary/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,11 @@ def annotate_dgv(bpps, dgv_regions_by_reference_name, distance=0):
for bpp in [
b for b in bpps if not b.interchromosomal and b.break1.chr in dgv_regions_by_reference_name
]:
bpp.data['dgv'] = []
for dgv_region in dgv_regions_by_reference_name[bpp.break1.chr]:
dist = abs(Interval.dist(Interval(dgv_region.start), bpp.break1))
if dist > lowest_resolution + distance:
break
continue
elif (
dist > distance
or abs(Interval.dist(Interval(dgv_region.end), bpp.break2)) > distance
Expand All @@ -224,8 +225,8 @@ def annotate_dgv(bpps, dgv_regions_by_reference_name, distance=0):
refname = dgv_region.reference_object.name
except AttributeError:
pass
bpp.data['dgv'] = '{}({}:{}-{})'.format(
dgv_region.name, refname, dgv_region.start, dgv_region.end
bpp.data['dgv'].append(
'{}({}:{}-{})'.format(dgv_region.name, refname, dgv_region.start, dgv_region.end)
)

zhemingfan marked this conversation as resolved.
Show resolved Hide resolved

Expand Down
2 changes: 2 additions & 0 deletions tests/data/mock_dgv_annotation.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
chr start end name
1 1 2300000 nsv482937
1 10001 22118 dgv1n82
1 10001 22120 rgv2n98
1 10001 22221 rgv2n99
1 10001 127330 nsv7879
1 10191 10281 nsv958854
1 10377 177417 nsv428112
Expand Down
40 changes: 38 additions & 2 deletions tests/test_mavis/summary/test_summary.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import pytest
from mavis.breakpoint import Breakpoint, BreakpointPair
from mavis.constants import CALL_METHOD, COLUMNS, PROTOCOL, STRAND, SVTYPE
from mavis.summary.summary import filter_by_annotations
from mavis.summary.summary import filter_by_annotations, annotate_dgv
from mavis.annotate.file_io import load_masking_regions

from ...util import todo
from ...util import todo, get_data


@pytest.fixture
Expand Down Expand Up @@ -40,11 +41,34 @@ def genomic_event2():
)


@pytest.fixture
def genomic_event3():
return BreakpointPair(
Breakpoint('1', 10001),
Breakpoint('1', 22118),
opposing_strands=True,
**{
COLUMNS.event_type: SVTYPE.DEL,
COLUMNS.call_method: CALL_METHOD.CONTIG,
COLUMNS.fusion_sequence_fasta_id: None,
COLUMNS.protocol: PROTOCOL.GENOME,
COLUMNS.fusion_cdna_coding_start: None,
COLUMNS.fusion_cdna_coding_end: None,
COLUMNS.tracking_id: "genomic_event3",
}
)


@pytest.fixture
def best_transcripts():
return {'ABCA': True, 'ABCD': True}


@pytest.fixture
def dgv_event2():
return load_masking_regions(get_data("mock_dgv_annotation.txt"))


class TestFilterByAnnotations:
def test_filter_by_annotations_two_best_transcripts(
self, genomic_event1, genomic_event2, best_transcripts
Expand Down Expand Up @@ -174,3 +198,15 @@ def test_filtering_events_split(self):
@todo
def test_get_pairing_state(self):
pass


class TestFilterByCallMethod:
def test_annotate_dgv_distance_bed(self, genomic_event3, dgv_event2):
bpps = [genomic_event3]
annotate_dgv(bpps, dgv_event2, 103)
print(bpps[0].data)
assert len(bpps[0].data['dgv']) == 3
assert (
bpps[0].data['dgv']
== ['dgv1n82(1:10001-22118)', 'rgv2n98(1:10001-22120)', 'rgv2n99(1:10001-22221)']
)