Skip to content

Commit

Permalink
switch to simple id distributor as thread safety is not needed acrros…
Browse files Browse the repository at this point in the history
… differnt processes
  • Loading branch information
andrewprzh committed Sep 13, 2024
1 parent e3f3524 commit 7fb27d8
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 34 deletions.
19 changes: 7 additions & 12 deletions src/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,13 @@
import os
import re
import subprocess
import threading
import math
from collections import defaultdict
from enum import Enum

logger = logging.getLogger('IsoQuant')


class AtomicIDDistributor(object):
def __init__(self):
self.value = 0
self._lock = threading.Lock()

def increment(self):
with self._lock:
self.value += 1
return self.value


class CigarEvent(Enum):
match = 0
insertion = 1
Expand All @@ -48,6 +36,13 @@ def get_ins_del_match_events(cls):
return {cls.match, cls.insertion, cls.deletion, cls.seq_match, cls.seq_mismatch}


class TranscriptNaming:
transcript_prefix = "transcript"
novel_gene_prefix = "novel_gene_"
nic_transcript_suffix = ".nic"
nnic_transcript_suffix = ".nnic"


# key, value
def get_first_best_from_sorted(sorted_list_of_pairs):
if not sorted_list_of_pairs:
Expand Down
7 changes: 3 additions & 4 deletions src/gene_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@
equal_ranges,
get_intron_strand,
intervals_total_length,
is_subprofile,
overlaps,
junctions_from_blocks,
AtomicIDDistributor
junctions_from_blocks
)
from .id_policy import SimpleIDDistributor

logger = logging.getLogger('IsoQuant')

Expand Down Expand Up @@ -121,7 +120,7 @@ def print_debug(self):

# exon/intron info
class FeatureInfo:
feature_id_counter = AtomicIDDistributor()
feature_id_counter = SimpleIDDistributor()
def __init__(self, chr_id, start, end, strand, type, gene_ids):
self.id = FeatureInfo.feature_id_counter.increment()
self.chr_id = chr_id
Expand Down
19 changes: 8 additions & 11 deletions src/graph_based_model_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from enum import unique, Enum

from .common import (
TranscriptNaming,
cmp,
get_exons,
intersection_len,
Expand Down Expand Up @@ -46,10 +47,6 @@ class StrandnessReportingLevel(Enum):


class GraphBasedModelConstructor:
transcript_prefix = "transcript"
novel_gene_prefix = "novel_gene_"
nic_transcript_suffix = ".nic"
nnic_transcript_suffix = ".nnic"
detected_known_isoforms = set()
extended_transcript_ids = set()

Expand Down Expand Up @@ -411,7 +408,7 @@ def construct_fl_isoforms(self):
transcript_range = (path[0][1], path[-1][1])
novel_exons = get_exons(transcript_range, list(intron_path))
count = self.path_storage.paths[path]
new_transcript_id = self.transcript_prefix + str(self.get_transcript_id())
new_transcript_id = TranscriptNaming.transcript_prefix + str(self.get_transcript_id())
# logger.debug("uuu %s: %s" % (new_transcript_id, str(novel_exons)))

reference_isoform = None
Expand Down Expand Up @@ -482,17 +479,17 @@ def construct_fl_isoforms(self):

transcript_gene = self.select_reference_gene(intron_path, transcript_range, transcript_strand)
if transcript_gene is None:
transcript_gene = (GraphBasedModelConstructor.novel_gene_prefix + self.gene_info.chr_id +
transcript_gene = (TranscriptNaming.novel_gene_prefix + self.gene_info.chr_id +
"_" + str(self.get_transcript_id()))
elif transcript_strand == '.':
transcript_strand = self.gene_info.gene_strands[transcript_gene]

if all(intron in self.known_introns for intron in intron_path):
transcript_type = TranscriptModelType.novel_in_catalog
id_suffix = self.nic_transcript_suffix
id_suffix = TranscriptNaming.nic_transcript_suffix
else:
transcript_type = TranscriptModelType.novel_not_in_catalog
id_suffix = self.nnic_transcript_suffix
id_suffix = TranscriptNaming.nnic_transcript_suffix

new_model = TranscriptModel(self.gene_info.chr_id, transcript_strand,
new_transcript_id + ".%s" % self.gene_info.chr_id + id_suffix,
Expand Down Expand Up @@ -644,11 +641,11 @@ def generate_monoexon_from_clustered(self, clustered_reads, forward=True):

strand = '+' if forward else '-'
coordinates = (five_prime_pos, three_prime_pos) if forward else (three_prime_pos, five_prime_pos)
new_transcript_id = self.transcript_prefix + str(self.get_transcript_id())
transcript_gene = (GraphBasedModelConstructor.novel_gene_prefix + self.gene_info.chr_id +
new_transcript_id = TranscriptNaming.transcript_prefix + str(self.get_transcript_id())
transcript_gene = (TranscriptNaming.novel_gene_prefix + self.gene_info.chr_id +
"_" + str(self.get_transcript_id()))
transcript_type = TranscriptModelType.novel_not_in_catalog
id_suffix = self.nnic_transcript_suffix
id_suffix = TranscriptNaming.nnic_transcript_suffix

is_valid = True
half_len = interval_len(coordinates) / 2
Expand Down
21 changes: 16 additions & 5 deletions src/id_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
# # All Rights Reserved
# See file LICENSE for details.
############################################################################
import threading


from .graph_based_model_construction import GraphBasedModelConstructor
from src.common import TranscriptNaming


class SimpleIDDistributor(object):
Expand All @@ -25,7 +25,7 @@ def __init__(self, genedb, chr_id):
return

for g in genedb.region(seqid=chr_id, start=1, featuretype="gene"):
if g.id.startswith(GraphBasedModelConstructor.novel_gene_prefix):
if g.id.startswith(TranscriptNaming.novel_gene_prefix):
try:
gene_num = int(g.id.split("_")[-1])
self.forbidden_ids.add(gene_num)
Expand All @@ -34,9 +34,9 @@ def __init__(self, genedb, chr_id):
except ValueError:
pass

transcript_num_start_pos = len(GraphBasedModelConstructor.transcript_prefix)
transcript_num_start_pos = len(TranscriptNaming.transcript_prefix)
for t in genedb.region(seqid=chr_id, start=1, featuretype=("transcript", "mRNA")):
if t.id.startswith(GraphBasedModelConstructor.transcript_prefix):
if t.id.startswith(TranscriptNaming.transcript_prefix):
try:
transcript_num = int(t.id.split(".")[0][transcript_num_start_pos:])
self.forbidden_ids.add(transcript_num)
Expand Down Expand Up @@ -78,3 +78,14 @@ def get_id(self, chr_id, feature, strand):
feature_id = self.id_dict[feature_tuple]

return feature_id


class AtomicIDDistributor(object):
def __init__(self):
self.value = 0
self._lock = threading.Lock()

def increment(self):
with self._lock:
self.value += 1
return self.value
5 changes: 3 additions & 2 deletions src/isoform_assignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

import logging
from enum import Enum, unique
from src.common import AtomicIDDistributor, junctions_from_blocks
from src.common import junctions_from_blocks
from src.id_policy import SimpleIDDistributor
from src.serialization import *
from src.polya_finder import PolyAInfo

Expand Down Expand Up @@ -619,7 +620,7 @@ def serialize(self, outfile):


class ReadAssignment:
assignment_id_generator = AtomicIDDistributor()
assignment_id_generator = SimpleIDDistributor()

def __init__(self, read_id, assignment_type, match=None):
self.assignment_id = ReadAssignment.assignment_id_generator.increment()
Expand Down

0 comments on commit 7fb27d8

Please sign in to comment.