Skip to content

Commit

Permalink
fix hp_dist
Browse files Browse the repository at this point in the history
  • Loading branch information
brentp committed Nov 27, 2024
1 parent dc95795 commit 1ffe683
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 23 deletions.
8 changes: 4 additions & 4 deletions src/fraguracy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ impl Counts {
a.pos() as u32,
a.cigar().end_pos() as u32,
if a.is_reverse() { -1 } else { 1 },
);
) + MAX_HP_DIST;

/* read1/2, F/R, pos, mq, bq, ctx, hp_dist */
let mut a_index = [
Expand All @@ -442,7 +442,7 @@ impl Counts {
aq as usize,
// NOTE that this could be an error so we might change this later if we learn a_base is an error
Counts::base_to_ctx2(a_base),
(a_hp_dist + MAX_HP_DIST) as usize,
a_hp_dist as usize,
];

let b_hp_dist = hp::hp_distance(
Expand All @@ -451,7 +451,7 @@ impl Counts {
b.pos() as u32,
b.cigar().end_pos() as u32,
if b.is_reverse() { -1 } else { 1 },
);
) + MAX_HP_DIST;

let mut b_index = [
1 - b.is_first_in_template() as usize,
Expand All @@ -460,7 +460,7 @@ impl Counts {
bq as usize,
// NOTE that this could be an error so we might change this later if we learn b_base is an error
Counts::base_to_ctx2(b_base),
(b_hp_dist + MAX_HP_DIST) as usize,
b_hp_dist as usize,
];

if a_base == b_base {
Expand Down
55 changes: 36 additions & 19 deletions src/homopolymer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ pub(crate) fn find_homopolymers(seq: &[u8], re: &Regex) -> Lapper<u32, u8> {

/// return a negative number if the hp is before the position, accounting for strand.
/// and 0 if the hp contains the position, otherwise a positive number.
/// hphphp---pos---->
///
pub(crate) fn hp_distance(
hps: Option<&[&Interval<u32, u8>]>,
pos: u32,
Expand All @@ -30,27 +32,48 @@ pub(crate) fn hp_distance(
for hp in hps.map(|hps| hps.iter()).unwrap_or_default() {
// first we check if the hp is within 3 bases of the read start or stop.
// since this could truncate the hp and not affect the read.
if pos < hp.start + 3 && pos > hp.stop.max(3) - 3 {
// cases to exclude:
// read: ----------->
// hp: AAAAA
// pos:

if hp.stop >= read_start && hp.stop < read_start + 3 {
continue;
}
if hp.start < read_stop && hp.start > read_stop - 3 {
continue;
}

assert!(pos >= read_start && pos <= read_stop);

// now we check distance of pos to hp.
let mut d = (if pos < hp.start {
hp.start - pos
let d = if pos < hp.start {
(hp.start - pos) as i64
} else if pos > hp.stop {
pos - hp.stop
-((pos - hp.stop) as i64)
} else {
0
})
.min(crate::fraguracy::MAX_HP_DIST as u32) as i8;
0i64
};
// now we check distance of pos to hp.
let mut d = d.clamp(
-crate::fraguracy::MAX_HP_DIST as i64,
crate::fraguracy::MAX_HP_DIST as i64,
) as i8;
if strand == -1 {
d = -d;
}
if d < dist {
//dbg!(pos, hp.start, hp.stop, strand, d);
if d.abs() < dist.abs() {
dist = d;
}
}
/*
if dist != crate::fraguracy::MAX_HP_DIST {
eprintln!(
"pos: {}, read: {}-{}, strand: {}, dist: {}",
pos, read_start, read_stop, strand, dist
);
}
*/
dist
}

Expand Down Expand Up @@ -91,8 +114,8 @@ mod tests {
#[test]
fn test_hp_distance() {
let hp = vec![Interval {
start: 10,
stop: 13,
start: 9,
stop: 12,
val: 0,
}];
let hp_refs: Vec<&Interval<u32, u8>> = hp.iter().collect();
Expand All @@ -104,15 +127,9 @@ mod tests {
);

// Test forward strand
let hp = vec![Interval {
start: 10,
stop: 13,
val: 0,
}];
let hp_refs: Vec<&Interval<u32, u8>> = hp.iter().collect();
assert_eq!(hp_distance(Some(&hp_refs), 15, 5, 20, 1,), 2);
assert_eq!(hp_distance(Some(&hp_refs), 15, 5, 20, 1,), -3);

// Test reverse strand
assert_eq!(hp_distance(Some(&hp_refs), 15, 5, 20, -1,), -2);
assert_eq!(hp_distance(Some(&hp_refs), 15, 5, 20, -1,), 3);
}
}

0 comments on commit 1ffe683

Please sign in to comment.