Skip to content

Commit

Permalink
SQANTI filters (MatthiasLienhard#8)
Browse files Browse the repository at this point in the history
* Added types, variable renames, refactored functions

They improve code readability and auto completion in IDEs. I doubt that they are complete yet, some specifically marked as "Any" because I wasn't certain.
Renamed abbreviated variables. Increases code readability. Follow up to the previous commits.
Refactored `has_overlap` and `get_intersects` in _utils.py. Especially the later one was unnecessarily complex.

* version number

* Types, error handling, variable names, splice_bubbles tweaked

moved from numeric type ids in _find_splice_bubbles_at_position to readable strings
  TSS and PAS are broken for this one I think, left two TODO comments

* SQANTI support import, new filters, filtered coordination test

more types

* Changed TSS/PAS events from 1 vs All to 1 vs 1

* Fix gene track without annotation

* Fix swapped 3' and 5' fragment

* Typos, variable renames and removing unused code

* Quote type
  • Loading branch information
IceFreez3r authored Oct 4, 2024
1 parent 584e282 commit 1c6cd0a
Show file tree
Hide file tree
Showing 11 changed files with 550 additions and 358 deletions.
2 changes: 1 addition & 1 deletion VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.3.5_rc10
0.3.5_rc11
2 changes: 1 addition & 1 deletion src/isotools/_gene_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ def gene_track(self, ax=None, title=None, reference=True, select_transcripts=Non
blocked[i] = transcript_end

# use SQANTI color palette if colorbySqanti is True
if colorbySqanti:
if colorbySqanti and 'annotation' in transcript:
color = sqanti_palette[transcript['annotation'][0]]['color']

# line from TSS to PAS at 0.25
Expand Down
19 changes: 14 additions & 5 deletions src/isotools/_transcriptome_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
import logging
import re
from ._utils import _filter_function, DEFAULT_KOZAK_PWM
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from .transcriptome import Transcriptome
from .gene import Gene

logger = logging.getLogger('isotools')
BOOL_OP = {'and', 'or', 'not', 'is'}
Expand Down Expand Up @@ -30,6 +35,10 @@
'PERMISSIVE': 'gene.coverage.sum(0)[transcript_id] >= 2 and (FSM or not (RTTS or INTERNAL_PRIMING or FRAGMENT))',
'BALANCED': 'gene.coverage.sum(0)[transcript_id] >= 2 and (FSM or (HIGH_COVER and not (RTTS or FRAGMENT or INTERNAL_PRIMING)))',
'STRICT': 'gene.coverage.sum(0)[transcript_id] >= 7 and SUBSTANTIAL and (FSM or not (RTTS or FRAGMENT or INTERNAL_PRIMING))',
'CAGE_SUPPORT': 'sqanti_classification is not None and sqanti_classification["within_CAGE_peak"]',
'TSS_RATIO': 'sqanti_classification is not None and sqanti_classification["ratio_TSS"] > 1.5',
'POLYA_MOTIF': 'sqanti_classification is not None and sqanti_classification["polyA_motif_found"]',
'POLYA_SITE': 'sqanti_classification is not None and sqanti_classification["within_polyA_site"]',
}

SPLICE_CATEGORY = ['FSM', 'ISM', 'NIC', 'NNC', 'NOVEL']
Expand Down Expand Up @@ -92,7 +101,7 @@ def add_orf_prediction(self, genome_fn, progress_bar=True, filter_transcripts={}
get_fickett=fickett_score, kozak_matrix=kozak_matrix, coding_hexamers=coding, noncoding_hexamers=noncoding)


def add_qc_metrics(self, genome_fn, progress_bar=True, downstream_a_len=30, direct_repeat_wd=15, direct_repeat_wobble=2, direct_repeat_mm=2,
def add_qc_metrics(self: 'Transcriptome', genome_fn: str, progress_bar=True, downstream_a_len=30, direct_repeat_wd=15, direct_repeat_wobble=2, direct_repeat_mm=2,
unify_ends=True):
''' Retrieves QC metrics for the transcripts.
Expand Down Expand Up @@ -186,7 +195,7 @@ def add_filter(self, tag, expression, context='transcript', update=False):
self.filter[context][tag] = expression


def iter_genes(self, region=None, query=None, min_coverage=None, max_coverage=None, gois=None, progress_bar=False):
def iter_genes(self: 'Transcriptome', region=None, query=None, min_coverage=None, max_coverage=None, gois=None, progress_bar=False):
'''Iterates over the genes of a region, optionally applying filters.
:param region: The region to be considered. Either a string "chr:start-end", or a tuple (chr, start, end). Start and end is optional.
Expand Down Expand Up @@ -249,7 +258,7 @@ def iter_genes(self, region=None, query=None, min_coverage=None, max_coverage=No
yield gene


def iter_transcripts(self, region=None, query=None, min_coverage=None, max_coverage=None, genewise=False, gois=None, progress_bar=False):
def iter_transcripts(self: 'Transcriptome', region=None, query=None, min_coverage=None, max_coverage=None, genewise=False, gois=None, progress_bar=False):
'''Iterates over the transcripts of a region, optionally applying filters.
By default, each iteration returns a 3 Tuple with the gene object, the transcript number and the transcript dictionary.
Expand Down Expand Up @@ -297,7 +306,7 @@ def iter_transcripts(self, region=None, query=None, min_coverage=None, max_cover
yield gene, i, transcript


def iter_ref_transcripts(self, region=None, query=None, genewise=False, gois=None, progress_bar=False):
def iter_ref_transcripts(self: 'Transcriptome', region=None, query=None, genewise=False, gois=None, progress_bar=False):
'''Iterates over the referemce transcripts of a region, optionally applying filters.
:param region: The region to be considered. Either a string "chr:start-end", or a tuple (chr,start,end). Start and end is optional.
Expand Down Expand Up @@ -352,7 +361,7 @@ def _eval_filter_fun(fun, name, **args):
# return False #or continue


def _filter_transcripts(gene, transcripts, query_fun, filter_fun, g_filter_eval, mincoverage=None, maxcoverage=None):
def _filter_transcripts(gene: 'Gene', transcripts, query_fun, filter_fun, g_filter_eval, mincoverage=None, maxcoverage=None):
''' Iterator over the transcripts of the gene.
Transcrips are specified by lists of flags submitted to the parameters.
Expand Down
Loading

0 comments on commit 1c6cd0a

Please sign in to comment.