Skip to content

Commit

Permalink
Merge pull request #107 from yhoogstrate/additional_filter_intercept
Browse files Browse the repository at this point in the history
Adds new type of filter that reduces FP ratio
  • Loading branch information
yhoogstrate authored Jan 25, 2018
2 parents 1a201c3 + be13f87 commit 912d77a
Show file tree
Hide file tree
Showing 12 changed files with 74 additions and 58 deletions.
3 changes: 3 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
2018-01-25 Youri Hoogstrate v0.15.1
* Added filter lr/intercept

2018-01-09 Youri Hoogstrate v0.15.0
* Bugfix resulting in higher number of detected frame shifts
* `dr-disco integrate --fasta <fa file>` provides edit distance to
Expand Down
7 changes: 7 additions & 0 deletions drdisco/DetectOutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,13 @@ def classify_intronic_exonic():
if chim_overhang < min_chim_overhang:
status.append("chim_overhang=" + str(chim_overhang) + "<" + str(min_chim_overhang))

# @todo subfunc
lr_intercept_max = (-31.0 * ((e.score + 100.0) / (450.0 + e.score + 100.0))) + 85.25
if e.lr_A_intercept > lr_intercept_max:
status.append("lr_A_intercept=" + str(e.lr_A_intercept) + ">" + str(lr_intercept_max))
if e.lr_B_intercept > lr_intercept_max:
status.append("lr_B_intercept=" + str(e.lr_B_intercept) + ">" + str(lr_intercept_max))

if len(status) == 0:
e.status = 'valid'
fh.write(str(e))
Expand Down
2 changes: 1 addition & 1 deletion drdisco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import logging
import sys

__version_info__ = ('0', '15', '0')
__version_info__ = ('0', '15', '1')
__version__ = '.'.join(__version_info__) if (len(__version_info__) == 3) else '.'.join(__version_info__[0:3]) + "-" + __version_info__[3]
__author__ = 'Youri Hoogstrate'
__homepage__ = 'https://github.com/yhoogstrate/dr-disco'
Expand Down
6 changes: 6 additions & 0 deletions share/blacklist-regions.hg38.bed
Original file line number Diff line number Diff line change
Expand Up @@ -3476,3 +3476,9 @@ chr17 82079668 82079707 + poly-T
chr17 82079668 82079707 - poly-T
chr5 141989680 141989720 + poly-A
chr5 141989680 141989720 - poly-A
chr16 78758908 78758976 + poly-A
chr16 78758908 78758976 - poly-A

chr8:102,365,441-102,365,508
chr3:123,143,378-123,143,417

2 changes: 1 addition & 1 deletion tests/classify/test_11.out.dbed
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-B-acceptor pos-B-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge median-AS-A median-AS-B max-AS-A max-AS-B data-structure
chr7 151970252 + 0 2 chr21 39777764 - 2 0 inf entropy=0.0<0.8394,n_support=1<7,n_lr_symmetry=112.87>=24.68 linear intronic 3 2 1 0 246 0 1 1 1 0 0 0.0000 0.0000 0.0000 0.0000 0.0000 14.0000 0.0000 1.0000 0.0000 0.0000 112.0000 0.0000 1.0000 0.0000 0.0000 0.3333 2.0000 14 112 14 112 chr7:151970252/151970253(+)->chr21:39777764/39777765(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)
chr7 151970252 + 0 2 chr21 39777764 - 2 0 inf entropy=0.0<0.8394,n_support=1<7,n_lr_symmetry=112.87>=24.68,lr_B_intercept=112.0>79.476039783 linear intronic 3 2 1 0 246 0 1 1 1 0 0 0.0000 0.0000 0.0000 0.0000 0.0000 14.0000 0.0000 1.0000 0.0000 0.0000 112.0000 0.0000 1.0000 0.0000 0.0000 0.3333 2.0000 14 112 14 112 chr7:151970252/151970253(+)->chr21:39777764/39777765(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)
2 changes: 1 addition & 1 deletion tests/classify/test_12.out.dbed
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-B-acceptor pos-B-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge median-AS-A median-AS-B max-AS-A max-AS-B data-structure
chr21 39817544 - 4 0 chr21 42880007 + 0 4 3062463 entropy=0.0<0.8341,n_support=2<7,n_lr_symmetry=103.08>=17.71 linear intronic 6 4 2 0 496 0 1 1 1 0 0 0.0000 0.0000 0.0000 0.0000 0.0000 100.0000 0.0000 1.0000 0.0000 0.0000 25.0000 0.0000 1.0000 0.0000 0.0000 0.3333 2.0000 100 25 100 25 chr21:39817544/39817545(-)->chr21:42880007/42880008(+):(spanning_paired_1:2,spanning_paired_2:2)
chr21 39817544 - 4 0 chr21 42880007 + 0 4 3062463 entropy=0.0<0.8341,n_support=2<7,n_lr_symmetry=103.08>=17.71,lr_A_intercept=100.0>79.3399280576 linear intronic 6 4 2 0 496 0 1 1 1 0 0 0.0000 0.0000 0.0000 0.0000 0.0000 100.0000 0.0000 1.0000 0.0000 0.0000 25.0000 0.0000 1.0000 0.0000 0.0000 0.3333 2.0000 100 25 100 25 chr21:39817544/39817545(-)->chr21:42880007/42880008(+):(spanning_paired_1:2,spanning_paired_2:2)
chr21 39792184 - 6 0 chr21 42879912 + 0 6 3087728 n_support=3<8 linear intronic 3 0 0 3 726 0 3 3 1 2 0 0.0000 1.0000 0.0000 0.0000 0.0000 4.0000 0.0000 1.0000 0.0000 0.0000 4.0000 0.0000 1.0000 0.0000 60.0000 0.0000 1.3333 126 120 126 120 chr21:39792184/39792185(-)->chr21:42879912/42879913(+):(discordant_mates:2)&chr21:39793115/39793116(-)->chr21:42879912/42879913(+):(discordant_mates:2)&chr21:39795473/39795474(-)->chr21:42879912/42879913(+):(discordant_mates:2)
2 changes: 1 addition & 1 deletion tests/classify/test_13.out.dbed
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ chr21 40073871 - 18 0 chr21 42870045 + 0 18 2796174 valid linear intronic 26 103
chr21 40056045 - 14 0 chr21 42870045 + 0 14 2814000 valid linear intronic 20 92 6 2 1850 2 3 2 3 0 1 1.0000 1.0000 0.0000 0.4056 13.4857 46.9524 0.8602 0.0280 3.9977 12.8000 12.3333 0.8708 0.0240 3.6137 0.3333 2.3000 1.6667 94 31 126 126 chr21:40056045/40056046(-)->chr21:42870045/42870046(+):(discordant_mates:2,spanning_paired_1:5,spanning_paired_2:5)&chr21:40056045/40056046(-)->chr21:42880007/42880008(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:40055887/40055888(-)->chr21:42878380/42878381(+):(discordant_mates:2)
chr21 39817544 - 8 0 chr21 42878371 + 0 8 3060827 n_support=10<12,log_ratio_slope=13.2>1.8,log_ratio_rvalue=9.21>0.4 linear exonic 18 33 4 6 1886 1 5 5 4 1 0 1.0000 0.9398 0.0000 0.6395 54.0000 14.0000 1.0000 0.0000 0.0000 0.0000 41.0000 0.0000 1.0000 0.0000 1.5000 0.9167 1.8000 73 51 126 116 chr21:39817544/39817545(-)->chr21:42878371/42878372(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:40064721/40064722(-)->chr21:42878371/42878372(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:39817370/39817371(-)->chr21:42883791/42883792(+):(discordant_mates:8)&chr21:39817410/39817411(-)->chr21:42878907/42878908(+):(discordant_mates:2)&chr21:39817510/39817511(-)->chr21:42883787/42883788(+):(discordant_mates:2)
chr21 39850904 - 8 0 chr21 42877894 + 0 8 3026990 n_support=4<7 linear intronic 8 4 2 2 895 0 1 1 1 0 0 1.0000 1.0000 0.0000 1.0000 37.2000 23.7000 0.9039 0.0961 12.4491 16.3000 70.3000 0.9316 0.0684 4.4978 1.0000 0.2500 2.0000 35 90 126 126 chr21:39850904/39850905(-)->chr21:42877894/42877895(+):(discordant_mates:4,spanning_paired_1_t:2,spanning_paired_2_t:2)
chr21 39969017 - 2 0 chr21 42870045 + 0 2 2901028 n_support=2<8,n_lr_symmetry=105.32>=24.68 linear intronic 3 75 1 1 333 0 2 2 2 0 0 0.0000 1.0000 0.0000 0.0000 0.0000 103.0000 0.0000 1.0000 0.0000 0.0000 22.0000 0.0000 1.0000 0.0000 1.0000 12.5000 2.0000 103 22 124 91 chr21:39969017/39969018(-)->chr21:42870045/42870046(+):(spanning_singleton_1:1,spanning_singleton_2:1)&chr21:39968827/39968828(-)->chr21:42879876/42879877(+):(discordant_mates:2)
chr21 39969017 - 2 0 chr21 42870045 + 0 2 2901028 n_support=2<8,n_lr_symmetry=105.32>=24.68,lr_A_intercept=103.0>79.476039783 linear intronic 3 75 1 1 333 0 2 2 2 0 0 0.0000 1.0000 0.0000 0.0000 0.0000 103.0000 0.0000 1.0000 0.0000 0.0000 22.0000 0.0000 1.0000 0.0000 1.0000 12.5000 2.0000 103 22 124 91 chr21:39969017/39969018(-)->chr21:42870045/42870046(+):(spanning_singleton_1:1,spanning_singleton_2:1)&chr21:39968827/39968828(-)->chr21:42879876/42879877(+):(discordant_mates:2)
chr21 40103950 + 0 2 chr21 42869215 - 2 0 2765265 entropy=0.0<0.8394,n_support=1<7,n_lr_symmetry=73.25>=24.68 linear intronic 3 2 1 0 162 0 1 1 1 0 0 0.0000 0.0000 0.0000 0.0000 0.0000 46.0000 0.0000 1.0000 0.0000 0.0000 57.0000 0.0000 1.0000 0.0000 0.0000 0.3333 2.0000 46 57 46 57 chr21:40103950/40103951(+)->chr21:42869215/42869216(-):(spanning_paired_1:1,spanning_paired_2:1)
4 changes: 2 additions & 2 deletions tests/classify/test_15.out.dbed
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-B-acceptor pos-B-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads alignment-score mismatches n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge median-AS-A median-AS-B max-AS-A max-AS-B data-structure
chr21 39817544 - 4 0 chr21 42870045 + 0 4 3052501 n_support=2<7,log_ratio_slope=2.4>1.8 linear exonic 4 2 1 1 480 0 1 1 1 0 0 1.0000 1.0000 0.0000 1.0000 99.0000 24.0000 1.0000 0.0000 0.0000 9.0000 102.0000 1.0000 0.0000 0.0000 1.0000 0.2500 2.0000 24 102 123 111 chr21:39817544/39817545(-)->chr21:42870045/42870046(+):(discordant_mates:2,spanning_paired_1:1,spanning_paired_2:1)
chr21 39956806 - 4 0 chr21 42870051 + 0 4 2913245 entropy=0.0<0.841,n_support=2<7,n_lr_symmetry=176.07>=28.44 linear intronic 2 0 0 2 492 0 1 1 1 0 0 0.0000 0.0000 0.0000 0.0000 0.0000 124.0000 0.0000 1.0000 0.0000 0.0000 125.0000 0.0000 1.0000 0.0000 40.0000 0.0000 2.0000 124 125 124 125 chr21:39956806/39956807(-)->chr21:42870051/42870052(+):(discordant_mates:4)
chr21 39817544 - 4 0 chr21 42870045 + 0 4 3052501 n_support=2<7,log_ratio_slope=2.4>1.8,lr_B_intercept=102.0>79.4305054152 linear exonic 4 2 1 1 480 0 1 1 1 0 0 1.0000 1.0000 0.0000 1.0000 99.0000 24.0000 1.0000 0.0000 0.0000 9.0000 102.0000 1.0000 0.0000 0.0000 1.0000 0.2500 2.0000 24 102 123 111 chr21:39817544/39817545(-)->chr21:42870045/42870046(+):(discordant_mates:2,spanning_paired_1:1,spanning_paired_2:1)
chr21 39956806 - 4 0 chr21 42870051 + 0 4 2913245 entropy=0.0<0.841,n_support=2<7,n_lr_symmetry=176.07>=28.44,lr_A_intercept=124.0>79.5217391304,lr_B_intercept=125.0>79.5217391304 linear intronic 2 0 0 2 492 0 1 1 1 0 0 0.0000 0.0000 0.0000 0.0000 0.0000 124.0000 0.0000 1.0000 0.0000 0.0000 125.0000 0.0000 1.0000 0.0000 40.0000 0.0000 2.0000 124 125 124 125 chr21:39956806/39956807(-)->chr21:42870051/42870052(+):(discordant_mates:4)
Loading

0 comments on commit 912d77a

Please sign in to comment.