Skip to content

Commit

Permalink
removed sortedlist
Browse files Browse the repository at this point in the history
typing fixes
  • Loading branch information
adraismawur committed Aug 22, 2023
1 parent f281be9 commit e0d5366
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 35 deletions.
38 changes: 16 additions & 22 deletions bigscape.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def load_data(run: RunParameters):
profiler = Profiler(run.output.profile_path)
profiler.start()

if run.output.db_path.exists():
if False:
logging.info("Loading existing run from disk...")

DB.load_from_disk(run.output.db_path)
Expand All @@ -60,9 +60,6 @@ def load_data(run: RunParameters):

for gbk in gbks:
HSP.load_all(gbk.genes)
for cds in gbk.genes:
# TODO: remove once we get rid of sortedlists
cds.lock()

HMMer.init(run.input.pfam_path)

Expand Down Expand Up @@ -110,38 +107,33 @@ def callback(tasks_done):

if platform.system() == "Darwin":
logging.warning("Running on mac-OS: hmmsearch_simple single threaded")
all_hsps = list(HMMer.hmmsearch_simple(all_cds, 1))
HMMer.hmmsearch_simple(all_cds, 1)
else:
logging.debug(
"Running on %s: hmmsearch_multiprocess with %d cores",
platform.system(),
run.cores,
)

# if legacy is true, set cutoff to 1.1 for the domain filtering so we can use
# legacy filtering later

if run.legacy:
domain_overlap_cutoff = 1.1
else:
domain_overlap_cutoff = run.hmmer.domain_overlap_cutoff
# TODO: the overlap filtering in this function does not seem to work
HMMer.hmmsearch_multiprocess(
all_cds,
domain_overlap_cutoff=domain_overlap_cutoff,
domain_overlap_cutoff=run.hmmer.domain_overlap_cutoff,
cores=run.cores,
)

# TODO: move, or remove after the add_hsp_overlap function is fixed (if it is broken
# in the first place)
for cds in all_cds:
if run.legacy:
cds.hsps = legacy_filter_overlap(cds.hsps, 0.1)

# TODO: remove when sortedlists are removed
# this converts the sortedlist used internally to regular lists.
# for some reason, doing this beforehand really messes things up for reasons I don't
# understand.
cds.lock()
# this sorts all CDS and then filters them using the old filtering system, which
# is less efficient than the flitering using the CDS.add_hsp_overlap_filter
# method. however, that method seems to be broken somehow
all_hsps = []
for gbk in gbks:
for cds in gbk.genes:
cds.hsps = sorted(cds.hsps)
all_hsps.extend(
[hsp.domain for hsp in legacy_filter_overlap(cds.hsps, 0.1)]
)

all_hsps = []
for cds in all_cds:
Expand Down Expand Up @@ -171,6 +163,8 @@ def callback(tasks_done):
all_alignments = list()
for cds in all_cds:
for hsp in cds.hsps:
if hsp.alignment is None:
continue
all_alignments.append(hsp.alignment)

logging.info("%d alignments", len(all_alignments))
Expand Down
13 changes: 4 additions & 9 deletions src/genbank/cds.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

# from dependencies
from Bio.SeqFeature import SeqFeature
from sortedcontainers import SortedList

# from other modules
from src.errors import InvalidGBKError
Expand All @@ -32,7 +31,7 @@ class CDS:
gene_kind: str
strand: Bool
aa_seq: SeqRecord.seq
hsps: SortedList[HSP]
hsps: list[HSP]
"""

def __init__(self, nt_start: int, nt_stop: int):
Expand All @@ -43,7 +42,7 @@ def __init__(self, nt_start: int, nt_stop: int):
self.gene_kind: Optional[str] = None
self.strand: Optional[int] = None
self.aa_seq: str = ""
self.hsps: SortedList[HSP] | list[HSP] = SortedList()
self.hsps: list[HSP] = []
self.__locked = False

# db specific fields
Expand All @@ -65,13 +64,9 @@ def add_hsp_overlap_filter(self, new_hsp: HSP, domain_overlap_cutoff=0.1) -> Non
0.1
"""
# TODO: remove once sorted lists are gone entirely
if self.__locked or isinstance(self.hsps, list):
raise AttributeError("Cannot add HSPs to a locked CDS")

# if no hsps added yet, just add and continue
if len(self.hsps) == 0:
self.hsps.add(new_hsp)
self.hsps.append(new_hsp)
return

delete_list = []
Expand Down Expand Up @@ -117,7 +112,7 @@ def add_hsp_overlap_filter(self, new_hsp: HSP, domain_overlap_cutoff=0.1) -> Non

# if we got through all of that without the function, we never replaced an HSP
# so add a new one here
self.hsps.add(new_hsp)
self.hsps.append(new_hsp)

def save(self, commit=True):
"""Saves this CDS to the database and optionally executes a commit
Expand Down
5 changes: 1 addition & 4 deletions src/hmm/hsp.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,7 @@ def load_all(cds_list: list[CDS]) -> None:
)
new_hsp.alignment = HSPAlignment(new_hsp, result.alignment)

if isinstance(cds.hsps, list):
raise ValueError("HSP list of CDS is a list. Did the CDS get locked?")

cds.hsps.add(new_hsp)
cds.hsps.append(new_hsp)

@staticmethod
def has_overlap(hsp_a: HSP, hsp_b: HSP) -> bool:
Expand Down

0 comments on commit e0d5366

Please sign in to comment.