Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Coverage stop coordinate switch to exclusive #204

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Change Log

## Version 0.1.6-dev

### Fixed
- Corrected coverage calculation to (1) remove an unnecessary left shift on start and (2) record the stop coordinate as exclusive in accordance with bedtools ([#204](https://github.com/qiyunzhu/woltka/pull/204)).

## Version 0.1.6 (2/22/2024)

### Changed
Expand Down
8 changes: 4 additions & 4 deletions woltka/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def parse_sam_file_ex(fh):
this, pool = qname, ([], [], [])

# append
pool[mate].append((rname, None, length, pos, pos + offset - 1))
pool[mate].append((rname, None, length, pos, pos + offset))

# final yield
if pool[0]:
Expand Down Expand Up @@ -590,7 +590,7 @@ def parse_sam_file_ex_ft(fh, excl):
pos = int(pos)
length, offset = cigar_to_lens(cigar)
pool[int(flag) >> 6 & 3].append((
rname, None, length, pos, pos + offset - 1))
rname, None, length, pos, pos + offset))

elif keep:
if rname in excl:
Expand All @@ -599,7 +599,7 @@ def parse_sam_file_ex_ft(fh, excl):
pos = int(pos)
length, offset = cigar_to_lens(cigar)
pool[int(flag) >> 6 & 3].append((
rname, None, length, pos, pos + offset - 1))
rname, None, length, pos, pos + offset))

if pool[0]:
yield this, pool[0]
Expand Down Expand Up @@ -718,7 +718,7 @@ def parse_sam_file_pd(fh, n=65536):
# # this is slow, because of function all
# chunk['length'], offset = zip(*chunk['cigar'].apply(
# cigar_to_lens))
# chunk['right'] = chunk['pos'] + offset - 1
# chunk['right'] = chunk['pos'] + offset
# # this is slow, because of function all
# # chunk['qname'] = chunk[['qname', 'flag']].apply(
# # qname_by_flag, axis=1)
Expand Down
2 changes: 1 addition & 1 deletion woltka/coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def merge_ranges(ranges):
if cend is None:
# case 1: no active range, start active range
cstart, cend = start, end
elif cend >= start - 1:
elif cend >= start:
# case 2: active range continues through this range
# extend active range
cend = max(cend, end)
Expand Down
28 changes: 14 additions & 14 deletions woltka/tests/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,13 +304,13 @@ def test_parse_sam_file_ex(self):
)
obs = list(parse_sam_file_ex(iter(sam)))
exp = [
('S1/1', [('NC_123456', None, 100, 26, 125)]),
('S1/2', [('NC_123456', None, 80, 151, 230)]),
('S2', [('NC_789012', None, 90, 186, 280)]),
('S3/1', [('NC_123456', None, 100, 452, 551),
('NC_345678', None, 100, 133, 232)]),
('S3/2', [('NC_123456', None, 95, 378, 477),
('NC_345678', None, 95, 261, 355)])
('S1/1', [('NC_123456', None, 100, 26, 126)]),
('S1/2', [('NC_123456', None, 80, 151, 231)]),
('S2', [('NC_789012', None, 90, 186, 281)]),
('S3/1', [('NC_123456', None, 100, 452, 552),
('NC_345678', None, 100, 133, 233)]),
('S3/2', [('NC_123456', None, 95, 378, 478),
('NC_345678', None, 95, 261, 356)])
]
self.assertEqual(len(obs), len(exp))
for o, e in zip(obs, exp):
Expand Down Expand Up @@ -375,13 +375,13 @@ def test_parse_sam_file_ex_ft(self):
)
obs = list(parse_sam_file_ex_ft(iter(sam), {'G1'}))
exp = [
('S2', [('G2', None, 50, 80, 129),
('G3', None, 50, 80, 129)]),
('S2/1', [('G3', None, 50, 80, 129)]),
('S2/2', [('G4', None, 50, 80, 129)]),
('S4', [('G6', None, 50, 80, 129)]),
('S4/1', [('G6', None, 50, 80, 129)]),
('S4/2', [('G6', None, 50, 80, 129)])
('S2', [('G2', None, 50, 80, 130),
('G3', None, 50, 80, 130)]),
('S2/1', [('G3', None, 50, 80, 130)]),
('S2/2', [('G4', None, 50, 80, 130)]),
('S4', [('G6', None, 50, 80, 130)]),
('S4/1', [('G6', None, 50, 80, 130)]),
('S4/2', [('G6', None, 50, 80, 130)])
]
self.assertEqual(len(obs), len(exp))
for o, e in zip(obs, exp):
Expand Down
12 changes: 6 additions & 6 deletions woltka/tests/test_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def test_merge_ranges(self):
self.assertListEqual(obs, exp)

# ranges that are connected (but not overlapped)
obs = merge_ranges([1, 2, 3, 4, 5, 6])
exp = [1, 6]
obs = merge_ranges([1, 2, 2, 3, 3, 4])
exp = [1, 4]
self.assertListEqual(obs, exp)

# empty range list
Expand Down Expand Up @@ -79,8 +79,8 @@ def test_parse_ranges(self):
('S2', 'G2'): [0, [26, 100, 151, 200]],
('S2', 'G3'): [0, [1, 50, 76, 125]],
('S2', 'G4'): [0, [26, 75, 101, 150]],
('S3', 'G1'): [0, [1, 200]],
('S3', 'G2'): [0, [1, 100]]}
('S3', 'G1'): [0, [1, 50, 51, 100, 101, 150, 151, 200]],
('S3', 'G2'): [0, [1, 50, 51, 100]]}
self.assertDictEqual(obs, exp)

def test_calc_coverage(self):
Expand All @@ -97,8 +97,8 @@ def test_calc_coverage(self):
'S2': {'G2': [26, 100, 151, 200],
'G3': [1, 50, 76, 125],
'G4': [26, 75, 101, 150]},
'S3': {'G1': [1, 200],
'G2': [1, 100]}}
'S3': {'G1': [1, 50, 51, 100, 101, 150, 151, 200],
'G2': [1, 50, 51, 100]}}
self.assertDictEqual(obs, exp)

def test_write_coverage(self):
Expand Down
6 changes: 3 additions & 3 deletions woltka/tests/test_ordinal.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,9 +265,9 @@ def test_ordinal_parser_dummy(self):
'NC_123456': {0: 100, 1: 80},
'NC_789012': {2: 90}})
self.assertDictEqual(obs[2], {
'NC_123456': [(26, True, False, 0), (125, False, False, 0),
(151, True, False, 1), (230, False, False, 1)],
'NC_789012': [(186, True, False, 2), (280, False, False, 2)]})
'NC_123456': [(26, True, False, 0), (126, False, False, 0),
(151, True, False, 1), (231, False, False, 1)],
'NC_789012': [(186, True, False, 2), (281, False, False, 2)]})

def test_ordinal_mapper(self):
# uses the same example as above, with some noises
Expand Down
4 changes: 2 additions & 2 deletions woltka/tests/test_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ def test_coverage(self):
with open(join(outcov_dir, 'S04.cov'), 'r') as f:
obs = f.read().splitlines()
self.assertEqual(len(obs), 1078)
self.assertEqual(obs[10], 'G000007265\t2092666\t2092815')
self.assertEqual(obs[200], 'G000215745\t768758\t769038')
self.assertEqual(obs[10], 'G000007265\t2092666\t2092816')
self.assertEqual(obs[200], 'G000215745\t768758\t769039')
remove(output_fp)
rmtree(outcov_dir)

Expand Down