Skip to content

Commit

Permalink
Add min_distance_between_ranges parameter to query functions for bett…
Browse files Browse the repository at this point in the history
…er range management
  • Loading branch information
AndreaGuarracino committed Jan 24, 2025
1 parent 82e84a0 commit 8fe6097
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 13 deletions.
47 changes: 41 additions & 6 deletions src/impg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ impl Impg {
masked_regions: Option<&FxHashMap<u32, SortedRanges>>,
max_depth: u16,
min_interval_size: u32,
min_distance_between_ranges: u32,
) -> Vec<AdjustedInterval> {
let mut results = Vec::new();
// Add the input range to the results
Expand Down Expand Up @@ -440,14 +441,48 @@ impl Impg {
// Only add non-overlapping portions to the stack for further exploration
if metadata.query_id != current_target_id {
let ranges = visited_ranges.entry(metadata.query_id)
.or_default(); // Note the closure here
.or_default();

let mut should_add = true;

if min_distance_between_ranges > 0 {
let (new_min, new_max) = if adjusted_query_start <= adjusted_query_end {
(adjusted_query_start, adjusted_query_end)
} else {
(adjusted_query_end, adjusted_query_start)
};

// Find insertion point in sorted ranges
let idx = match ranges.ranges.binary_search_by_key(&new_min, |&(start, _)| start) {
Ok(i) => i,
Err(i) => i,
};

// Only need to check adjacent ranges due to sorting
if idx > 0 {
// Check previous range
let (_, prev_end) = ranges.ranges[idx - 1];
if ((new_min - prev_end).abs() as u32) < min_distance_between_ranges {
should_add = false;
}
}
if should_add && idx < ranges.ranges.len() {
// Check next range
let (next_start, _) = ranges.ranges[idx];
if ((next_start - new_max).abs() as u32) < min_distance_between_ranges {
should_add = false;
}
}
}

let new_ranges = ranges.insert((adjusted_query_start, adjusted_query_end));
if should_add {
let new_ranges = ranges.insert((adjusted_query_start, adjusted_query_end));

// Add non-overlapping portions to stack
for (new_start, new_end) in new_ranges {
if (new_end - new_start).abs() as u32 >= min_interval_size {
stack.push((metadata.query_id, new_start, new_end, current_depth + 1));
// Add non-overlapping portions to stack
for (new_start, new_end) in new_ranges {
if ((new_end - new_start).abs() as u32) >= min_interval_size {
stack.push((metadata.query_id, new_start, new_end, current_depth + 1));
}
}
}
}
Expand Down
23 changes: 17 additions & 6 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ enum Args {
/// Minimum size of intervals to consider for transitive queries
#[clap(long, value_parser, default_value_t = 0)]
min_interval_size: u32,

/// Minimum distance between transitive ranges to consider on the same sequence
#[clap(long, value_parser, default_value_t = 0)]
min_distance_between_ranges: u32,
},
/// Query overlaps in the alignment.
Query {
Expand All @@ -88,6 +92,10 @@ enum Args {
/// Minimum size of intervals to consider for transitive queries
#[clap(long, value_parser, default_value_t = 0)]
min_interval_size: u32,

/// Minimum distance between transitive ranges to consider on the same sequence
#[clap(long, value_parser, default_value_t = 0)]
min_distance_between_ranges: u32,

/// Output results in PAF format.
#[clap(short = 'P', long, action)]
Expand Down Expand Up @@ -116,9 +124,10 @@ fn main() -> io::Result<()> {
merge_distance,
max_depth,
min_interval_size,
min_distance_between_ranges,
} => {
let impg = initialize_impg(&common)?;
partition_alignments(&impg, window_size, &sequence_prefix, min_length, merge_distance, max_depth, min_interval_size, common.verbose > 1)?;
partition_alignments(&impg, window_size, &sequence_prefix, min_length, merge_distance, max_depth, min_interval_size, min_distance_between_ranges, common.verbose > 1)?;
},
Args::Query {
common,
Expand All @@ -128,13 +137,14 @@ fn main() -> io::Result<()> {
output_paf,
check_intervals,
max_depth,
min_interval_size
min_interval_size,
min_distance_between_ranges
} => {
let impg = initialize_impg(&common)?;

if let Some(target_range) = target_range {
let (target_name, target_range) = parse_target_range(&target_range)?;
let results = perform_query(&impg, &target_name, target_range, transitive, max_depth, min_interval_size);
let results = perform_query(&impg, &target_name, target_range, transitive, max_depth, min_interval_size, min_distance_between_ranges);
if check_intervals {
let invalid_cigars = impg::impg::check_intervals(&impg, &results);
if !invalid_cigars.is_empty() {
Expand All @@ -155,7 +165,7 @@ fn main() -> io::Result<()> {
let targets = parse_bed_file(&target_bed)?;
info!("Parsed {} target ranges from BED file", targets.len());
for (target_name, target_range, name) in targets {
let results = perform_query(&impg, &target_name, target_range, transitive, max_depth, min_interval_size);
let results = perform_query(&impg, &target_name, target_range, transitive, max_depth, min_interval_size, min_distance_between_ranges);
if check_intervals {
let invalid_cigars = impg::impg::check_intervals(&impg, &results);
if !invalid_cigars.is_empty() {
Expand Down Expand Up @@ -316,7 +326,8 @@ fn perform_query(
target_range: (i32, i32),
transitive: bool,
max_depth: u16,
min_interval_size: u32
min_interval_size: u32,
min_distance_between_ranges: u32
) -> Vec<AdjustedInterval> {
let (target_start, target_end) = target_range;
let target_id = impg.seq_index.get_id(target_name).expect("Target name not found in index");
Expand All @@ -325,7 +336,7 @@ fn perform_query(
panic!("Target range end ({}) exceeds the target sequence length ({})", target_end, target_length);
}
if transitive {
impg.query_transitive(target_id, target_start, target_end, None, max_depth, min_interval_size)
impg.query_transitive(target_id, target_start, target_end, None, max_depth, min_interval_size, min_distance_between_ranges)
} else {
impg.query(target_id, target_start, target_end)
}
Expand Down
3 changes: 2 additions & 1 deletion src/partition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pub fn partition_alignments(
merge_distance: usize,
max_depth: u16,
min_interval_size: u32,
min_distance_between_ranges: u32,
debug: bool,
) -> io::Result<()> {
// Get all sequences with the given prefix
Expand Down Expand Up @@ -115,7 +116,7 @@ pub fn partition_alignments(

// Query overlaps for current window
//let query_start = Instant::now();
let mut overlaps = impg.query_transitive(*seq_id, *start, *end, Some(&masked_regions), max_depth, min_interval_size);
let mut overlaps = impg.query_transitive(*seq_id, *start, *end, Some(&masked_regions), max_depth, min_interval_size, min_distance_between_ranges);
//let query_time = query_start.elapsed();
debug!(" Collected {} query overlaps", overlaps.len());

Expand Down

0 comments on commit 8fe6097

Please sign in to comment.