Skip to content

Commit

Permalink
Merge pull request #7 from bkille/remove-path-from-index
Browse files Browse the repository at this point in the history
Remove path from index
  • Loading branch information
ekg authored Apr 24, 2024
2 parents ac5088a + 87af5e1 commit 6b15fa3
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 33 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rust_build_test.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: Rust CI

on: [push]
on: [push, pull_request]

jobs:
build_and_test:
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "impg"
version = "0.1.0"
version = "0.2.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand Down
10 changes: 5 additions & 5 deletions src/impg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ impl QueryMetadata {

pub type QueryInterval = (Interval<u32>, Vec<CigarOp>);
type TreeMap = HashMap<u32, BasicCOITree<QueryMetadata, u32>>;
pub type SerializableImpg = (HashMap<u32, Vec<SerializableInterval>>, SequenceIndex, String);
pub type SerializableImpg = (HashMap<u32, Vec<SerializableInterval>>, SequenceIndex);

#[derive(Clone, Serialize, Deserialize)]
pub struct SerializableInterval {
Expand Down Expand Up @@ -184,11 +184,11 @@ impl Impg {
}).collect();
(*target_id, intervals)
}).collect();
(serializable_trees, self.seq_index.clone(), self.paf_file.clone())
(serializable_trees, self.seq_index.clone())
}

pub fn from_serializable(serializable: SerializableImpg) -> Self {
let (serializable_trees, seq_index, paf_file) = serializable;
pub fn from_paf_and_serializable(paf_file: &str, serializable: SerializableImpg) -> Self {
let (serializable_trees, seq_index) = serializable;
let paf_gzi_index: Option<bgzf::gzi::Index> = if [".gz", ".bgz"].iter().any(|e| paf_file.ends_with(e)) {
let paf_gzi_file = paf_file.to_owned() + ".gzi";
Some(bgzf::gzi::read(paf_gzi_file.clone()).expect(format!("Could not open {}", paf_gzi_file).as_str()))
Expand All @@ -203,7 +203,7 @@ impl Impg {
}).collect::<Vec<_>>().as_slice());
(target_id, tree)
}).collect();
Self { trees, seq_index, paf_file, paf_gzi_index }
Self { trees, seq_index, paf_file: paf_file.to_string(), paf_gzi_index }
}

pub fn query(&self, target_id: u32, range_start: i32, range_end: i32) -> Vec<QueryInterval> {
Expand Down
43 changes: 17 additions & 26 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@ struct Args {
#[clap(short='p', long, value_parser)]
paf_file: Option<String>,

/// Path to the index file. Use this to specify a custom index file or to force the use of an index.
#[clap(short='i', long, value_parser)]
index_file: Option<String>,

/// Force the regeneration of the index, even if it already exists.
#[clap(short='I', long, action)]
force_reindex: bool,
Expand Down Expand Up @@ -57,12 +53,9 @@ fn main() -> io::Result<()> {
ThreadPoolBuilder::new().num_threads(args.num_threads.into()).build_global().unwrap();

let impg = match args {
Args { paf_file: Some(paf), index_file: None, force_reindex: false, .. } => load_or_generate_index(&paf, None, args.num_threads)?,
Args { paf_file: Some(paf), index_file: None, force_reindex: true, .. } => generate_index(&paf, None, args.num_threads)?,
Args { paf_file: Some(paf), index_file: Some(index), force_reindex: false, .. } => load_or_generate_index(&paf, Some(&index), args.num_threads)?,
Args { paf_file: Some(paf), index_file: Some(index), force_reindex: true, .. } => generate_index(&paf, Some(&index), args.num_threads)?,
Args { paf_file: None, index_file: Some(index), .. } => load_index(&index)?,
_ => return Err(io::Error::new(io::ErrorKind::InvalidInput, "Either a PAF file or an index file must be provided")),
Args { paf_file: Some(paf), force_reindex: false, .. } => load_or_generate_index(&paf, args.num_threads)?,
Args { paf_file: Some(paf), force_reindex: true, .. } => generate_index(&paf, args.num_threads)?,
_ => return Err(io::Error::new(io::ErrorKind::InvalidInput, "A PAF file must be provided")),
};

if args.stats {
Expand Down Expand Up @@ -118,18 +111,16 @@ fn parse_bed_file(bed_file: &str) -> io::Result<Vec<(String, (i32, i32), Option<
}


fn load_or_generate_index(paf_file: &str, index_file: Option<&str>, num_threads: NonZeroUsize) -> io::Result<Impg> {
let index_file = index_file.map(|s| s.to_string());
let index_file = index_file.unwrap_or_else(|| format!("{}.impg", paf_file));
let index_file = index_file.as_str();
if std::path::Path::new(index_file).exists() {
load_index(index_file)
fn load_or_generate_index(paf_file: &str, num_threads: NonZeroUsize) -> io::Result<Impg> {
let index_file = format!("{}.impg", paf_file);
if std::path::Path::new(&index_file).exists() {
load_index(paf_file)
} else {
generate_index(paf_file, Some(index_file), num_threads)
generate_index(paf_file, num_threads)
}
}

fn generate_index(paf_file: &str, index_file: Option<&str>, num_threads: NonZeroUsize) -> io::Result<Impg> {
fn generate_index(paf_file: &str, num_threads: NonZeroUsize) -> io::Result<Impg> {
let file = File::open(paf_file)?;
let reader: Box<dyn io::Read> = if [".gz", ".bgz"].iter().any(|e| paf_file.ends_with(e)) {
Box::new(bgzf::MultithreadedReader::with_worker_count(num_threads, file))
Expand All @@ -140,21 +131,21 @@ fn generate_index(paf_file: &str, index_file: Option<&str>, num_threads: NonZero
let records = paf::parse_paf(reader).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("Failed to parse PAF records: {:?}", e)))?;
let impg = Impg::from_paf_records(&records, paf_file).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("Failed to create index: {:?}", e)))?;

if let Some(index_file) = index_file {
let serializable = impg.to_serializable();
let file = File::create(index_file)?;
let writer = BufWriter::new(file);
bincode::serialize_into(writer, &serializable).map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to serialize index: {:?}", e)))?;
}
let index_file = format!("{}.impg", paf_file);
let serializable = impg.to_serializable();
let file = File::create(index_file)?;
let writer = BufWriter::new(file);
bincode::serialize_into(writer, &serializable).map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to serialize index: {:?}", e)))?;

Ok(impg)
}

fn load_index(index_file: &str) -> io::Result<Impg> {
fn load_index(paf_file: &str) -> io::Result<Impg> {
let index_file = format!("{}.impg", paf_file);
let file = File::open(index_file)?;
let reader = BufReader::new(file);
let serializable: SerializableImpg = bincode::deserialize_from(reader).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("Failed to deserialize index: {:?}", e)))?;
Ok(Impg::from_serializable(serializable))
Ok(Impg::from_paf_and_serializable(paf_file, serializable))
}

fn parse_target_range(target_range: &str) -> io::Result<(String, (i32, i32))> {
Expand Down

0 comments on commit 6b15fa3

Please sign in to comment.