Skip to content

Commit

Permalink
Merge pull request #41 from pangenome/min_trans_query_len
Browse files Browse the repository at this point in the history
`impg query/partition`: Add `min_interval_size` parameter for transitive queries
  • Loading branch information
AndreaGuarracino authored Jan 24, 2025
2 parents d41b472 + 241a3bc commit 82e84a0
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 8 deletions.
7 changes: 5 additions & 2 deletions src/impg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,8 @@ impl Impg {
range_start: i32,
range_end: i32,
masked_regions: Option<&FxHashMap<u32, SortedRanges>>,
max_depth: u16
max_depth: u16,
min_interval_size: u32,
) -> Vec<AdjustedInterval> {
let mut results = Vec::new();
// Add the input range to the results
Expand Down Expand Up @@ -445,7 +446,9 @@ impl Impg {

// Add non-overlapping portions to stack
for (new_start, new_end) in new_ranges {
stack.push((metadata.query_id, new_start, new_end, current_depth + 1));
if (new_end - new_start).abs() as u32 >= min_interval_size {
stack.push((metadata.query_id, new_start, new_end, current_depth + 1));
}
}
}
}
Expand Down
21 changes: 16 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ enum Args {
/// Maximum recursion depth for transitive overlaps (0 for no limit).
#[clap(short = 'm', long, value_parser, default_value_t = 1)]
max_depth: u16,

/// Minimum size of intervals to consider for transitive queries
#[clap(long, value_parser, default_value_t = 0)]
min_interval_size: u32,
},
/// Query overlaps in the alignment.
Query {
Expand All @@ -81,6 +85,10 @@ enum Args {
#[clap(short = 'm', long, value_parser, default_value_t = 1)]
max_depth: u16,

/// Minimum size of intervals to consider for transitive queries
#[clap(long, value_parser, default_value_t = 0)]
min_interval_size: u32,

/// Output results in PAF format.
#[clap(short = 'P', long, action)]
output_paf: bool,
Expand All @@ -107,9 +115,10 @@ fn main() -> io::Result<()> {
min_length,
merge_distance,
max_depth,
min_interval_size,
} => {
let impg = initialize_impg(&common)?;
partition_alignments(&impg, window_size, &sequence_prefix, min_length, merge_distance, max_depth, common.verbose > 1)?;
partition_alignments(&impg, window_size, &sequence_prefix, min_length, merge_distance, max_depth, min_interval_size, common.verbose > 1)?;
},
Args::Query {
common,
Expand All @@ -119,12 +128,13 @@ fn main() -> io::Result<()> {
output_paf,
check_intervals,
max_depth,
min_interval_size
} => {
let impg = initialize_impg(&common)?;

if let Some(target_range) = target_range {
let (target_name, target_range) = parse_target_range(&target_range)?;
let results = perform_query(&impg, &target_name, target_range, transitive, max_depth);
let results = perform_query(&impg, &target_name, target_range, transitive, max_depth, min_interval_size);
if check_intervals {
let invalid_cigars = impg::impg::check_intervals(&impg, &results);
if !invalid_cigars.is_empty() {
Expand All @@ -145,7 +155,7 @@ fn main() -> io::Result<()> {
let targets = parse_bed_file(&target_bed)?;
info!("Parsed {} target ranges from BED file", targets.len());
for (target_name, target_range, name) in targets {
let results = perform_query(&impg, &target_name, target_range, transitive, max_depth);
let results = perform_query(&impg, &target_name, target_range, transitive, max_depth, min_interval_size);
if check_intervals {
let invalid_cigars = impg::impg::check_intervals(&impg, &results);
if !invalid_cigars.is_empty() {
Expand Down Expand Up @@ -305,7 +315,8 @@ fn perform_query(
target_name: &str,
target_range: (i32, i32),
transitive: bool,
max_depth: u16
max_depth: u16,
min_interval_size: u32
) -> Vec<AdjustedInterval> {
let (target_start, target_end) = target_range;
let target_id = impg.seq_index.get_id(target_name).expect("Target name not found in index");
Expand All @@ -314,7 +325,7 @@ fn perform_query(
panic!("Target range end ({}) exceeds the target sequence length ({})", target_end, target_length);
}
if transitive {
impg.query_transitive(target_id, target_start, target_end, None, max_depth)
impg.query_transitive(target_id, target_start, target_end, None, max_depth, min_interval_size)
} else {
impg.query(target_id, target_start, target_end)
}
Expand Down
3 changes: 2 additions & 1 deletion src/partition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pub fn partition_alignments(
min_length: usize,
merge_distance: usize,
max_depth: u16,
min_interval_size: u32,
debug: bool,
) -> io::Result<()> {
// Get all sequences with the given prefix
Expand Down Expand Up @@ -114,7 +115,7 @@ pub fn partition_alignments(

// Query overlaps for current window
//let query_start = Instant::now();
let mut overlaps = impg.query_transitive(*seq_id, *start, *end, Some(&masked_regions), max_depth);
let mut overlaps = impg.query_transitive(*seq_id, *start, *end, Some(&masked_regions), max_depth, min_interval_size);
//let query_time = query_start.elapsed();
debug!(" Collected {} query overlaps", overlaps.len());

Expand Down

0 comments on commit 82e84a0

Please sign in to comment.