diff --git a/tests/paired.bam b/tests/paired.bam new file mode 100644 index 00000000..bbd354bb Binary files /dev/null and b/tests/paired.bam differ diff --git a/tests/paired.bam.bai b/tests/paired.bam.bai new file mode 100644 index 00000000..fd9ba3c4 Binary files /dev/null and b/tests/paired.bam.bai differ diff --git a/tests/paired_group_discard_chimeras.sam b/tests/paired_group_discard_chimeras.sam new file mode 100644 index 00000000..192b13e1 --- /dev/null +++ b/tests/paired_group_discard_chimeras.sam @@ -0,0 +1,8729 @@ +@HD VN:1.0 SO:coordinate +NS500105:308:HCGGHAFXY:1:11101:11142:17827_ACTGTGACCATCGTTC 355 chr1 18288 1 1S67M = 195312 177101 TCCTTGATCTTCTCAATCTTGGCCTGGGCCAAGGAGACCTTCTCTCCAATGGCCTGCACCTGGCTCCG EAEEEE/EEEEEEEEEEEEEEE 0: + processor.positions) + if processor.positions > 0: U.info("Mean number of unique UMIs per position: %.2f" % - (float(processor.UMIClusterer.total_umis_per_position) / - processor.UMIClusterer.positions)) + (float(processor.total_umis_per_position) / + processor.positions)) U.info("Max. number of unique UMIs per position: %i" % - processor.UMIClusterer.max_umis_per_position) + processor.max_umis_per_position) else: U.warn("The BAM did not contain any valid " "reads/read pairs for deduplication") diff --git a/umi_tools/umi_methods.py b/umi_tools/umi_methods.py index 237766cc..f45c0b1d 100644 --- a/umi_tools/umi_methods.py +++ b/umi_tools/umi_methods.py @@ -1379,6 +1379,28 @@ def __call__(self, inreads): else: self.read_events['Input Reads'] += 1 + # only ever dealing with read1s from here + + if self.options.paired: + if read.is_paired: + self.read_events['Read pairs'] += 1 + else: + self.read_events['Unpaired reads'] += 1 + + # if paired end input and read1 is unpaired... + + # skip, or + if self.options.unpaired_reads == "discard": + continue + + # yield without grouping, or + elif self.options.unpaired_reads == "output": + yield read, None, "single_read" + + # Use read pair; TLEN will be 0 + elif self.options.unpaired_reads == "use": + pass + if read.is_unmapped: if self.options.paired: if read.mate_is_unmapped: @@ -1388,20 +1410,41 @@ def __call__(self, inreads): else: self.read_events['Single end unmapped'] += 1 + # if read1 is unmapped, yield immediately or skip read if self.return_unmapped: self.read_events['Input Reads'] += 1 yield read, None, "single_read" continue - if read.mate_is_unmapped and self.options.paired: + if self.options.paired and read.mate_is_unmapped: if not read.is_unmapped: self.read_events['Read 2 unmapped'] += 1 - if self.return_unmapped: + + # if paired end input and read2 is unmapped, skip unless + # options.unmapped_reads == "use", in which case TLEN will be 0 + if self.options.unmapped_reads != "use": + if self.return_unmapped: + yield read, None, "single_read" + continue + + if read.is_paired and ( + read.reference_name != read.next_reference_name): + self.read_events['Chimeric read pair'] += 1 + + # if paired end input and read2 is mapped to another contig... + + # skip, or + if self.options.chimeric_pairs == "discard": + continue + + # yield without grouping, or + elif self.options.chimeric_pairs == "output": yield read, None, "single_read" - continue + continue - if self.options.paired: - self.read_events['Paired Reads'] += 1 + # Use read pair; TLEN will be 0 + elif self.options.chimeric_pairs == "use": + pass if self.options.subset: if random.random() >= self.options.subset: