From 877c35f646d0cafa699ec759e022f0e8cae94873 Mon Sep 17 00:00:00 2001 From: Andrey Prjibelski Date: Tue, 7 May 2024 11:22:07 +0300 Subject: [PATCH] do not forget some of the unassigned reads --- src/graph_based_model_construction.py | 7 +++++-- src/transcript_printer.py | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/graph_based_model_construction.py b/src/graph_based_model_construction.py index e600eada..83ea48ed 100644 --- a/src/graph_based_model_construction.py +++ b/src/graph_based_model_construction.py @@ -146,7 +146,7 @@ def process(self, read_assignment_storage): self.forward_counts() # FIXME: remove asserts below - if self.transcript_model_storage and len(set([x.read_id for x in read_assignment_storage])) != len(self.read_assignment_counts): + if len(set([x.read_id for x in read_assignment_storage])) != len(self.read_assignment_counts): logger.warning("Some reads were not assigned %d %d" % (len(set([x.read_id for x in read_assignment_storage])), len(self.read_assignment_counts))) # FIXME: remove asserts below if any(value < 0 for value in self.read_assignment_counts.values()): @@ -706,6 +706,9 @@ def transcript_from_reference(self, isoform_id): # assign reads back to constructed isoforms def assign_reads_to_models(self, read_assignments): if not self.transcript_model_storage: + for assignment in read_assignments: + read_id = assignment.read_id + self.read_assignment_counts[read_id] = 0 logger.debug("No transcripts were assigned") return @@ -721,7 +724,7 @@ def assign_reads_to_models(self, read_assignments): continue read_exons = assignment.corrected_exons - #logger.debug("# Checking read %s: %s" % (assignment.read_id, str(read_exons))) + # logger.debug("# Checking read %s: %s" % (assignment.read_id, str(read_exons))) model_combined_profile = profile_constructor.construct_profiles(read_exons, assignment.polya_info, []) model_assignment = assigner.assign_to_isoform(assignment.read_id, model_combined_profile) model_assignment.read_group = assignment.read_group diff --git a/src/transcript_printer.py b/src/transcript_printer.py index c566f49e..fa8b7beb 100644 --- a/src/transcript_printer.py +++ b/src/transcript_printer.py @@ -138,7 +138,8 @@ def dump_read_assignments(self, transcript_model_constructor): # write read_id -> transcript_id map if not self.output_r2t: return - for model_id, read_assignments in transcript_model_constructor.transcript_read_ids.items(): + for model_id in transcript_model_constructor.transcript_read_ids.keys(): + read_assignments = transcript_model_constructor.transcript_read_ids[model_id] for a in read_assignments: self.out_r2t.write("%s\t%s\n" % (a.read_id, model_id)) for read_id in transcript_model_constructor.read_assignment_counts.keys():