Skip to content

Commit

Permalink
Merge pull request #849 from uclahs-cds/czhu-fix-cyclic-import
Browse files Browse the repository at this point in the history
fix (moPepGen): cyclic-import
  • Loading branch information
zhuchcn authored Feb 28, 2024
2 parents b46ed19 + 0e8bab4 commit edf1aea
Show file tree
Hide file tree
Showing 20 changed files with 110 additions and 96 deletions.
3 changes: 1 addition & 2 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ disable=raw-checker-failed,
superfluous-parens,
unnecessary-lambda-assignment,
unnecessary-dunder-call,
unspecified-encoding,
cyclic-import
unspecified-encoding

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
Expand Down
11 changes: 7 additions & 4 deletions moPepGen/aa/PeptidePoolSplitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
from typing import Dict, IO, List, Set, TYPE_CHECKING
from pathlib import Path
from moPepGen.seqvar import GVFMetadata
from moPepGen import seqvar, circ, VARIANT_PEPTIDE_SOURCE_DELIMITER, \
from moPepGen import seqvar, circ, constant, VARIANT_PEPTIDE_SOURCE_DELIMITER, \
SPLIT_DATABASE_KEY_SEPARATER
from .VariantPeptidePool import VariantPeptidePool
from .VariantPeptideLabel import VariantPeptideInfo, VariantSourceSet, \
LabelSourceMapping, SOURCE_NONCODING, SOURCE_CODON_REASSIGNMENT, \
SOURCE_SEC_TERMINATION
LabelSourceMapping

if TYPE_CHECKING:
from .AminoAcidSeqRecord import AminoAcidSeqRecord
Expand Down Expand Up @@ -55,7 +54,11 @@ def get_reversed_group_map(self) -> Dict[str, List[str]]:
def append_order_internal_sources(self):
""" Add internal sources that are not present in any GTFs, including
noncoding, sec termination, and codon reassignment. """
sources = [SOURCE_NONCODING, SOURCE_SEC_TERMINATION, SOURCE_CODON_REASSIGNMENT]
sources = [
constant.SOURCE_NONCODING,
constant.SOURCE_SEC_TERMINATION,
constant.SOURCE_CODON_REASSIGNMENT
]
for source in sources:
if source in self.group_map:
source = self.group_map[source]
Expand Down
10 changes: 6 additions & 4 deletions moPepGen/aa/PeptidePoolSummarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,19 @@
import statistics
from typing import Dict, IO, List, Set, FrozenSet, Tuple, Optional
import matplotlib.pyplot as plt
from moPepGen import seqvar
from moPepGen import seqvar, constant
from moPepGen.aa.AminoAcidSeqRecord import AminoAcidSeqRecord
from moPepGen.aa.VariantPeptideLabel import VariantPeptideInfo, \
VariantSourceSet, LabelSourceMapping
from moPepGen.aa.VariantPeptidePool import VariantPeptidePool
from moPepGen.seqvar.GVFMetadata import GVFMetadata
from moPepGen.aa.VariantPeptideLabel import SOURCE_NONCODING, \
SOURCE_SEC_TERMINATION, SOURCE_CODON_REASSIGNMENT


SOURCES_INTERNAL = [SOURCE_NONCODING, SOURCE_SEC_TERMINATION, SOURCE_CODON_REASSIGNMENT]
SOURCES_INTERNAL = [
constant.SOURCE_NONCODING,
constant.SOURCE_SEC_TERMINATION,
constant.SOURCE_CODON_REASSIGNMENT
]

MUTUALLY_EXCLUSIVE_PARSERS:Dict[str,List[str]] = {
'parseSTARFusion': [
Expand Down
22 changes: 12 additions & 10 deletions moPepGen/aa/VariantPeptideLabel.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
""" module for peptide peptide labels """
from __future__ import annotations
from typing import Dict, Iterable, List, TYPE_CHECKING, Set
from moPepGen import err, seqvar, circ,SPLIT_DATABASE_KEY_SEPARATER
from moPepGen.seqvar import SEC_TERMINATION_TYPE, CODON_REASSIGNMENTS_TYPES
from moPepGen import err, seqvar, circ, constant, SPLIT_DATABASE_KEY_SEPARATER
from . import VariantPeptideIdentifier as pi

if TYPE_CHECKING:
from .AminoAcidSeqRecord import AminoAcidSeqRecord
from moPepGen.gtf import GenomicAnnotation

SOURCE_NONCODING = 'Noncoding'
SOURCE_CODON_REASSIGNMENT = 'CodonReassign'
SOURCE_SEC_TERMINATION = 'SECT'

class VariantSourceSet(set):
""" Variant source set. This is a class of ordered set.
Expand Down Expand Up @@ -208,14 +204,20 @@ def from_variant_peptide(peptide:AminoAcidSeqRecord,

if check_source:
if tx_id not in coding_tx:
info.sources.add(SOURCE_NONCODING, group_map=group_map)
info.sources.add(constant.SOURCE_NONCODING, group_map=group_map)

for gene_id, _ids in var_ids.items():
for var_id in _ids:
if var_id.split('-')[0] == SEC_TERMINATION_TYPE:
info.sources.add(SOURCE_SEC_TERMINATION, group_map=group_map)
elif var_id.split('-')[0] in CODON_REASSIGNMENTS_TYPES:
info.sources.add(SOURCE_CODON_REASSIGNMENT, group_map=group_map)
if var_id.split('-')[0] == constant.SEC_TERMINATION_TYPE:
info.sources.add(
constant.SOURCE_SEC_TERMINATION,
group_map=group_map
)
elif var_id.split('-')[0] in constant.CODON_REASSIGNMENTS_TYPES:
info.sources.add(
constant.SOURCE_CODON_REASSIGNMENT,
group_map=group_map
)
else:
source = label_map.get_source(gene_id, var_id)
info.sources.add(source, group_map=group_map)
Expand Down
13 changes: 13 additions & 0 deletions moPepGen/constant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
""" moPepGen constants """
# Variant related constants
SINGLE_NUCLEOTIDE_SUBSTITUTION = ['SNV', 'SNP', 'INDEL', 'MNV', 'RNAEditingSite']
ATTRS_POSITION = ['START', 'DONOR_START', 'ACCEPTER_START', 'ACCEPTER_POSITION']
ALTERNATIVE_SPLICING_TYPES = ['Insertion', 'Deletion', 'Substitution']
RMATS_TYPES = ['SE', 'RI', 'A3SS', 'A5SS', 'MXE']
CODON_REASSIGNMENTS_TYPES = ['W2F']
SEC_TERMINATION_TYPE = 'SECT'

# Variant sources
SOURCE_NONCODING = 'Noncoding'
SOURCE_CODON_REASSIGNMENT = 'CodonReassign'
SOURCE_SEC_TERMINATION = 'SECT'
4 changes: 2 additions & 2 deletions moPepGen/fake.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from moPepGen.gtf.GenomicAnnotation import GenomicAnnotation, GeneAnnotationModel, \
TranscriptAnnotationModel, GTFSeqFeature
from moPepGen.seqvar import VariantRecord
from moPepGen.seqvar.VariantRecord import ALTERNATIVE_SPLICING_TYPES
from moPepGen.circ import CircRNAModel
from moPepGen import constant


# pylint: disable=R0912, R0915
Expand Down Expand Up @@ -185,7 +185,7 @@ def fake_rmats_record(anno:GenomicAnnotation, genome:DNASeqDict, tx_id:str
) -> VariantRecord:
""" Create an alternative splicing variant """
while True:
var_type = random.choice(ALTERNATIVE_SPLICING_TYPES)
var_type = random.choice(constant.ALTERNATIVE_SPLICING_TYPES)
if var_type != 'Substitution' or len(anno.transcripts[tx_id].exon) > 3:
break
rmats_type = random.choice(ALT_SPLICE_TYPE_TO_RMATS[var_type])
Expand Down
5 changes: 2 additions & 3 deletions moPepGen/seqvar/GVFMetadata.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
""" Module for GVF metadata """
from __future__ import annotations
from typing import List, IO
from moPepGen import __version__
from moPepGen.seqvar import SINGLE_NUCLEOTIDE_SUBSTITUTION
from moPepGen import __version__, constant
from .GVFMetadataInfo import GVF_METADATA_INFO, GVF_METADATA_ADDITIONAL


Expand Down Expand Up @@ -51,7 +50,7 @@ def add_info(self, variant_type:str) -> None:
self.add_alt(variant_type)
if variant_type in self.added_types:
return
if variant_type in SINGLE_NUCLEOTIDE_SUBSTITUTION:
if variant_type in constant.SINGLE_NUCLEOTIDE_SUBSTITUTION:
return
if variant_type == 'Fusion':
self.info.update(GVF_METADATA_INFO['Fusion'])
Expand Down
15 changes: 7 additions & 8 deletions moPepGen/seqvar/SplicingJunction.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
""" Splicing junction site """
from __future__ import annotations
from typing import TYPE_CHECKING, List
from moPepGen import seqvar
from moPepGen.SeqFeature import FeatureLocation
from .VariantRecord import VariantRecord


if TYPE_CHECKING:
from moPepGen.gtf import TranscriptAnnotationModel, GenomicAnnotation
from moPepGen.dna import DNASeqRecord
from moPepGen.seqvar import VariantRecord

class SpliceJunction():
""" Represents a splice junction between two genomic positions. """
Expand Down Expand Up @@ -190,7 +189,7 @@ def create_upstream_deletion(self, spanning:int, interjacent:List[int],
'GENOMIC_POSITION': genomic_position
}
_type = 'Deletion'
return seqvar.VariantRecord(location, ref, alt, _type, var_id, attrs)
return VariantRecord(location, ref, alt, _type, var_id, attrs)

def create_downstream_deletion(self, spanning:int, interjacent:List[int],
anno:GenomicAnnotation, gene_seq:DNASeqRecord, var_id:str
Expand Down Expand Up @@ -233,7 +232,7 @@ def create_downstream_deletion(self, spanning:int, interjacent:List[int],
'GENOMIC_POSITION': genomic_position
}
_type = 'Deletion'
return seqvar.VariantRecord(location, ref, alt, _type, var_id, attrs)
return VariantRecord(location, ref, alt, _type, var_id, attrs)

def create_upstream_substitution(self, interjacent:List[int],
anno:GenomicAnnotation, gene_seq:DNASeqRecord, var_id:str
Expand Down Expand Up @@ -286,7 +285,7 @@ def create_upstream_substitution(self, interjacent:List[int],
'GENOMIC_POSITION': genomic_position
}
_type = 'Substitution'
return seqvar.VariantRecord(location, ref, alt, _type, var_id, attrs)
return VariantRecord(location, ref, alt, _type, var_id, attrs)

def create_downstream_substitution(self, interjacent:List[int],
anno:GenomicAnnotation, gene_seq:DNASeqRecord, var_id:str
Expand Down Expand Up @@ -340,7 +339,7 @@ def create_downstream_substitution(self, interjacent:List[int],
'GENOMIC_POSITION': genomic_position
}
_type = 'Substitution'
return seqvar.VariantRecord(location, ref, alt, _type, var_id, attrs)
return VariantRecord(location, ref, alt, _type, var_id, attrs)

def create_upstream_insertion(self, anno:GenomicAnnotation,
gene_seq:DNASeqRecord, var_id:str) -> VariantRecord:
Expand Down Expand Up @@ -391,7 +390,7 @@ def create_upstream_insertion(self, anno:GenomicAnnotation,
'GENOMIC_POSITION': genomic_position
}
_type = 'Insertion'
return seqvar.VariantRecord(location, ref, alt, _type, var_id, attrs)
return VariantRecord(location, ref, alt, _type, var_id, attrs)

def create_downstream_insertion(self, anno:GenomicAnnotation,
gene_seq:DNASeqRecord, var_id:str) -> VariantRecord:
Expand Down Expand Up @@ -444,7 +443,7 @@ def create_downstream_insertion(self, anno:GenomicAnnotation,
'GENOMIC_POSITION': genomic_position
}
_type = 'Insertion'
return seqvar.VariantRecord(location, ref, alt, _type, var_id, attrs)
return VariantRecord(location, ref, alt, _type, var_id, attrs)

def convert_to_variant_records(self, anno:GenomicAnnotation,
gene_seq:DNASeqRecord, var_id:str):
Expand Down
22 changes: 8 additions & 14 deletions moPepGen/seqvar/VariantRecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,20 @@
from __future__ import annotations
import copy
from typing import TYPE_CHECKING, Dict, Iterable, List
from moPepGen import ERROR_NO_TX_AVAILABLE, \
from moPepGen import constant, ERROR_NO_TX_AVAILABLE, \
ERROR_VARIANT_NOT_IN_GENE_COORDINATE, ERROR_INDEX_IN_INTRON, \
ERROR_REF_LENGTH_NOT_MATCH_WITH_LOCATION
from moPepGen.SeqFeature import FeatureLocation


_VARIANT_TYPES = ['SNV', 'INDEL', 'MNV', 'Fusion', 'RNAEditingSite',
'Insertion', 'Deletion', 'Substitution', 'circRNA', 'SECT', 'W2F']

# To avoid circular import
if TYPE_CHECKING:
from moPepGen.gtf import GenomicAnnotation
from moPepGen.dna import DNASeqDict, DNASeqRecord, DNASeqRecordWithCoordinates

_VARIANT_TYPES = ['SNV', 'INDEL', 'MNV', 'Fusion', 'RNAEditingSite',
'Insertion', 'Deletion', 'Substitution', 'circRNA', 'SECT', 'W2F']
SINGLE_NUCLEOTIDE_SUBSTITUTION = ['SNV', 'SNP', 'INDEL', 'MNV', 'RNAEditingSite']
ATTRS_POSITION = ['START', 'DONOR_START', 'ACCEPTER_START', 'ACCEPTER_POSITION']
ALTERNATIVE_SPLICING_TYPES = ['Insertion', 'Deletion', 'Substitution']
RMATS_TYPES = ['SE', 'RI', 'A3SS', 'A5SS', 'MXE']
CODON_REASSIGNMENTS_TYPES = ['W2F']
SEC_TERMINATION_TYPE = 'SECT'


def create_variant_sect(anno:GenomicAnnotation, tx_id:str, pos:int) -> VariantRecord:
""" Create a VariantRecord for Selenocysteine Termination. """
Expand Down Expand Up @@ -325,7 +319,7 @@ def to_string(self) -> str:
qual = '.'
_filter = '.'

if self.type in SINGLE_NUCLEOTIDE_SUBSTITUTION:
if self.type in constant.SINGLE_NUCLEOTIDE_SUBSTITUTION:
ref = str(self.ref)
alt = str(self.alt)
elif self.type == 'Fusion':
Expand All @@ -347,7 +341,7 @@ def info(self) -> str:
out = ''
for key,val in self.attrs.items():
# using 1-base position
if key in ATTRS_POSITION:
if key in constant.ATTRS_POSITION:
val = str(int(val) + 1)
elif isinstance(val, list):
val = ','.join([str(x) for x in val])
Expand Down Expand Up @@ -380,11 +374,11 @@ def is_fusion(self) -> bool:

def is_alternative_splicing(self) -> bool:
""" Check if this is an alternative splicing event """
return any(self.id.startswith(x) for x in RMATS_TYPES)
return any(self.id.startswith(x) for x in constant.RMATS_TYPES)

def is_codon_reassignment(self) -> bool:
""" Check if the variant is a codon reassignment """
return self.type in CODON_REASSIGNMENTS_TYPES
return self.type in constant.CODON_REASSIGNMENTS_TYPES

def is_merged_mnv(self) -> bool:
""" Check if the variant is a MNV merged from individual adjacent variants. """
Expand Down
2 changes: 1 addition & 1 deletion moPepGen/seqvar/VariantRecordPool.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import copy
from typing import Dict, IO, Iterable, List, TYPE_CHECKING, Union
from moPepGen import ERROR_INDEX_IN_INTRON, circ
from moPepGen.seqvar.VariantRecordPoolOnDisk import TranscriptionalVariantSeries
from . import VariantRecord, io, GVFMetadata
from .VariantRecordPoolOnDisk import TranscriptionalVariantSeries


# To avoid circular import
Expand Down
9 changes: 4 additions & 5 deletions moPepGen/seqvar/VariantRecordPoolOnDisk.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
import copy
from typing import Dict, IO, Iterable, List, TYPE_CHECKING, Union
from pathlib import Path
from moPepGen import ERROR_INDEX_IN_INTRON, check_sha512, circ
from moPepGen import ERROR_INDEX_IN_INTRON, check_sha512, circ, constant
from moPepGen.seqvar.GVFIndex import GVFPointer, iterate_pointer
from moPepGen.seqvar.GVFMetadata import GVFMetadata
from moPepGen.seqvar.VariantRecord import ALTERNATIVE_SPLICING_TYPES
from . import VariantRecord


Expand Down Expand Up @@ -66,19 +65,19 @@ def is_empty(self) -> bool:
def has_any_noncanonical_transcripts(self) -> bool:
""" check if the series has any noncanonical transcripts """
return len(self.fusion) > 0 or len(self.circ_rna) > 0 or \
any(x.type in ALTERNATIVE_SPLICING_TYPES for x in self.transcriptional)
any(x.type in constant.ALTERNATIVE_SPLICING_TYPES for x in self.transcriptional)

def has_any_alternative_splicing(self) -> bool:
""" Check if there is any alternative splicing """
return any(x.type in ALTERNATIVE_SPLICING_TYPES for x in self.transcriptional)
return any(x.type in constant.ALTERNATIVE_SPLICING_TYPES for x in self.transcriptional)

def get_highest_hypermutated_region_complexity(self, distance:int=6):
""" Calculate the number of variants in the most hypermutated region """
end = 0
max_n = 0
cur_n = 0
for variant in self.transcriptional:
if variant.type in ALTERNATIVE_SPLICING_TYPES:
if variant.type in constant.ALTERNATIVE_SPLICING_TYPES:
continue
if variant.location.start - end >= distance:
end = variant.location.end
Expand Down
9 changes: 6 additions & 3 deletions moPepGen/seqvar/VariantRecordWithCoordinate.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
""" Module for variant record with coordinate. """
from __future__ import annotations
from typing import TYPE_CHECKING
import math
from moPepGen import seqvar
from moPepGen.SeqFeature import FeatureLocation


if TYPE_CHECKING:
from moPepGen.seqvar import VariantRecord

class VariantRecordWithCoordinate():
""" This class models the variant record with its coordinate location at
a gene or protein. This is used mainly in the graph to keep track on the
location of variants of a node when the variable bubble expands forward
or backward. """
def __init__(self, variant:seqvar.VariantRecord, location:FeatureLocation,
def __init__(self, variant:VariantRecord, location:FeatureLocation,
is_stop_altering:bool=False, is_silent:bool=False,
downstream_cleavage_altering:bool=False,
upstream_cleavage_altering:bool=False):
Expand Down Expand Up @@ -51,7 +54,7 @@ def to_protein_coordinates(self) -> VariantRecordWithCoordinate:
end = math.ceil(self.location.end / 3)
start_offset = self.location.start - start * 3
end_offset = end * 3 - self.location.end
return seqvar.VariantRecordWithCoordinate(
return VariantRecordWithCoordinate(
variant=self.variant,
location=FeatureLocation(
start=start, end=end, seqname=self.location.seqname,
Expand Down
4 changes: 1 addition & 3 deletions moPepGen/seqvar/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
""" Module for sequence variant """
from moPepGen.seqvar.VariantRecord import VariantRecord, \
SINGLE_NUCLEOTIDE_SUBSTITUTION, ALTERNATIVE_SPLICING_TYPES, \
CODON_REASSIGNMENTS_TYPES, SEC_TERMINATION_TYPE
from moPepGen.seqvar.VariantRecord import VariantRecord
from moPepGen.seqvar.VariantRecordWithCoordinate import \
VariantRecordWithCoordinate
from moPepGen.seqvar import io
Expand Down
6 changes: 3 additions & 3 deletions moPepGen/seqvar/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
from typing import Dict, Iterable, Tuple, Union, IO
from pathlib import Path
import tempfile
from moPepGen import GVF_HEADER
from moPepGen import GVF_HEADER, constant
from moPepGen.seqvar.GVFMetadata import GVFMetadata
from moPepGen.seqvar.VariantRecord import VariantRecord, ATTRS_POSITION
from moPepGen.seqvar.VariantRecord import VariantRecord
from moPepGen.SeqFeature import FeatureLocation


Expand Down Expand Up @@ -55,7 +55,7 @@ def parse_attrs(info:str) -> Dict[str, Union[str,int]]:
for field in info.split(';'):
key, val = field.split('=')
val = val.strip('"')
if key in ATTRS_POSITION:
if key in constant.ATTRS_POSITION:
val = str(int(val) - 1)
attrs[key] = val
return attrs
Expand Down
Loading

0 comments on commit edf1aea

Please sign in to comment.