diff --git a/.cargo/config b/.cargo/config new file mode 100644 index 000000000..f04323123 --- /dev/null +++ b/.cargo/config @@ -0,0 +1,12 @@ +[target.x86_64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + +[target.x86_64-unknown-linux-musl] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] +linker = "x86_64-linux-musl-gcc" \ No newline at end of file diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index e4b4ebb96..588cf5a1a 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -39,10 +39,6 @@ jobs: override: true components: clippy - - name: Install system dependencies - run: | - sudo apt-get install --yes zlib1g-dev libbz2-dev musl musl-dev musl-tools - - name: Lint with clippy uses: actions-rs/clippy-check@v1 with: @@ -67,7 +63,7 @@ jobs: - name: Install system dependencies run: | - sudo apt-get install --yes zlib1g-dev libbz2-dev musl musl-dev musl-tools + sudo apt-get install --yes zlib1g-dev libbz2-dev musl musl-dev musl-tools clang libc6-dev - name: Run cargo-tarpaulin uses: actions-rs/tarpaulin@v0.1 @@ -79,3 +75,17 @@ jobs: with: github-token: ${{ secrets.GITHUB_TOKEN }} path-to-lcov: ./lcov.info + + - name: Test musl build without default features + uses: actions-rs/cargo@v1 + with: + use-cross: true + command: build + args: --target x86_64-unknown-linux-musl --no-default-features + + - name: Test musl build with all features + uses: actions-rs/cargo@v1 + with: + use-cross: true + command: build + args: --target x86_64-unknown-linux-musl --all-features diff --git a/.gitignore b/.gitignore index 87e91fd70..e3a43dca2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .vscode/ +.idea *~ target Cargo.lock diff --git a/.gitmodules b/.gitmodules index d749293d0..2a109d59d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "htslib"] path = hts-sys/htslib - url = https://github.com/rust-bio/htslib.git + url = https://github.com/samtools/htslib.git diff --git a/Cargo.toml b/Cargo.toml index 127a7f133..1673387e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,9 +33,11 @@ snafu = ">= 0.5.0, <= 0.6.0" hts-sys = { version = "^1.9", path = "hts-sys", default-features = false } [features] -default = ["bzip2", "lzma"] +default = ["bzip2", "lzma", "curl"] bzip2 = ["hts-sys/bzip2"] lzma = ["hts-sys/lzma"] +curl = ["hts-sys/curl"] +openssl = ["hts-sys/openssl"] serde = ["serde_base", "serde_bytes"] [dev-dependencies] diff --git a/Cross.toml b/Cross.toml new file mode 100644 index 000000000..c753b089b --- /dev/null +++ b/Cross.toml @@ -0,0 +1,4 @@ +[target.x86_64-unknown-linux-musl] +image = "brainstorm/rust_musl_docker:stable-latest-libcurl" +[target.x86_64-unknown-linux-gnu] +image = "brainstorm/cross-x86_64-unknown-linux-gnu:libcurl-openssl" diff --git a/README.md b/README.md index d22643302..9a784beff 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,18 @@ If you only want to use the library, there is no need to clone the repository. G ## Requirements -To compile this crate you need the development headers of zlib, bzip2 and xz. +To compile this crate you need the development headers of zlib, bzip2 and xz. For instance, in Debian systems one needs the following dependencies: + +```shell +$ sudo apt-get install zlib1g-dev libbz2-dev liblzma-dev clang +``` + +On OSX, this will take a significant amount of time due to musl cross compiling toolchain: + +```shell +$ brew install FiloSottile/musl-cross/musl-cross +$ brew install bzip2 zlib xz curl-openssl +``` ## Usage diff --git a/docker/Dockerfile.gnu b/docker/Dockerfile.gnu new file mode 100644 index 000000000..2ec9fafe8 --- /dev/null +++ b/docker/Dockerfile.gnu @@ -0,0 +1,8 @@ +FROM rustembedded/cross:x86_64-unknown-linux-gnu + +ENV LIBCLANG_PATH /usr/lib/x86_64-linux-gnu +ENV LLVM_CONFIG_PATH /usr/bin +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev zlib1g-dev libbz2-dev liblzma-dev clang-8 && \ + ln -sf /usr/bin/llvm-config-8 /usr/bin/llvm-config && \ + ln -sf /usr/lib/x86_64-linux-gnu/libclang-8.so.1 /usr/lib/x86_64-linux-gnu/libclang.so.1 diff --git a/docker/Dockerfile.musl b/docker/Dockerfile.musl new file mode 100644 index 000000000..55b228447 --- /dev/null +++ b/docker/Dockerfile.musl @@ -0,0 +1,7 @@ +FROM rustembedded/cross:x86_64-unknown-linux-musl + +ENV PKG_CONFIG_ALLOW_CROSS 1 +ENV OPENSSL_LIB_DIR /usr/lib/x86_64-linux-gnu +ENV OPENSSL_INCLUDE_DIR /usr/include/openssl +RUN apt-get update && \ + apt-get install -y libssl-dev libcurl4-openssl-dev zlib1g-dev libbz2-dev liblzma-dev musl musl-dev musl-tools linux-libc-dev linux-headers-4.15.0-20-generic diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 000000000..349b54d58 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,17 @@ +# cross rustembedded containers + +Allows to compile (rust-)htslib in a variety of environments and architectures via [rustembedded cross](https://github.com/rust-embedded/cross). + +## Quickstart + +```shell +$ cd docker +$ docker build -t brainstorm/cross-x86_64-unknown-linux-musl:libcurl-openssl . -f Dockerfile.musl +$ docker build -t brainstorm/cross-x86_64-unknown-linux-gnu:libcurl-openssl . -f Dockerfile.gnu +``` + +Then to build and test rust-htslib with the above containers, proceed as you would with `cargo`, using `cross` instead, i.e: + +```shell +$ cross build --target x86_64-unknown-linux-musl +``` diff --git a/hts-sys/Cargo.toml b/hts-sys/Cargo.toml index 2fd254089..d3ff47d6d 100644 --- a/hts-sys/Cargo.toml +++ b/hts-sys/Cargo.toml @@ -8,7 +8,7 @@ description = "This library provides HTSlib bindings." readme = "README.md" keywords = ["htslib", "bam", "bioinformatics", "pileup", "sequencing"] license = "MIT" -repository = "https://github.com/rust-bio/rust-htslib.git" +repository = "https://github.com/samtools/htslib.git" documentation = "https://docs.rs/rust-htslib" edition = "2018" @@ -21,14 +21,18 @@ libc = "0.2" libz-sys = "1.0" bzip2-sys = { version = "0.1", optional = true } lzma-sys = { version = "0.1", optional = true } +curl-sys = { version = "0.4.26", optional = true } +openssl-sys = { version = "0.9.54", optional = true } [features] -default = ["bzip2", "lzma"] +default = ["bzip2", "lzma", "curl"] bzip2 = ["bzip2-sys"] lzma = ["lzma-sys"] +openssl = ["openssl-sys"] +curl = ["curl-sys"] [build-dependencies] fs-utils = "1.1" -bindgen = { version = "0.52.0", default-features = false, features = ["runtime"] } +bindgen = { version = "0.53.1", default-features = false, features = ["runtime"] } cc = "1.0" glob = "0.3.0" diff --git a/hts-sys/build.rs b/hts-sys/build.rs index 52f659d68..3891084e1 100644 --- a/hts-sys/build.rs +++ b/hts-sys/build.rs @@ -13,9 +13,9 @@ use std::fs; use std::path::PathBuf; use std::process::Command; -fn sed_htslib_makefile(out: &PathBuf, patterns: &Vec<&str>, feature: &str) { +fn sed_htslib_makefile(out: &PathBuf, patterns: &[&str], feature: &str) { for pattern in patterns { - if Command::new("sed") + if !Command::new("sed") .current_dir(out.join("htslib")) .arg("-i") .arg("-e") @@ -24,7 +24,6 @@ fn sed_htslib_makefile(out: &PathBuf, patterns: &Vec<&str>, feature: &str) { .status() .unwrap() .success() - != true { panic!("failed to strip {} support", feature); } @@ -63,10 +62,18 @@ fn main() { cfg.include(inc); } + let use_curl = env::var("CARGO_FEATURE_CURL").is_ok(); + if !use_curl { + let curl_patterns = vec!["s/ -lcurl//", "/#define HAVE_LIBCURL/d"]; + sed_htslib_makefile(&out, &curl_patterns, "curl"); + } else if let Ok(inc) = env::var("DEP_CURL_INCLUDE").map(PathBuf::from) { + cfg.include(inc); + } + let tool = cfg.get_compiler(); let (cc_path, cflags_env) = (tool.path(), tool.cflags_env()); let cc_cflags = cflags_env.to_string_lossy().replace("-O0", ""); - if Command::new("make") + if !Command::new("make") .current_dir(out.join("htslib")) .arg(format!("CC={}", cc_path.display())) .arg(format!("CFLAGS={}", cc_cflags)) @@ -75,7 +82,6 @@ fn main() { .status() .unwrap() .success() - != true { panic!("failed to build htslib"); } diff --git a/hts-sys/htslib b/hts-sys/htslib index d2c86b33b..fd0f89554 160000 --- a/hts-sys/htslib +++ b/hts-sys/htslib @@ -1 +1 @@ -Subproject commit d2c86b33b45ae8d43841ef1d137532c7be0f1a84 +Subproject commit fd0f89554459b78c07303e2c8a42acacd6851b46 diff --git a/hts-sys/src/lib.rs b/hts-sys/src/lib.rs index 5e614ee4f..de6108884 100644 --- a/hts-sys/src/lib.rs +++ b/hts-sys/src/lib.rs @@ -11,6 +11,8 @@ extern crate libz_sys; extern crate bzip2_sys; #[cfg(feature = "lzma")] extern crate lzma_sys; +#[cfg(feature = "curl")] +extern crate curl_sys; // include on-the-fly generated bindings include!(concat!(env!("OUT_DIR"), "/bindings.rs")); diff --git a/src/bam/buffer.rs b/src/bam/buffer.rs index 61d1e145e..f6ca6cd45 100644 --- a/src/bam/buffer.rs +++ b/src/bam/buffer.rs @@ -86,7 +86,7 @@ impl RecordBuffer { let to_remove = self .inner .iter() - .take_while(|rec| rec.pos() < window_start as i32) + .take_while(|rec| rec.pos() < window_start as i64) .count(); for _ in 0..to_remove { self.inner.pop_front(); @@ -111,7 +111,7 @@ impl RecordBuffer { record.cache_cigar(); } - if pos >= end as i32 { + if pos >= end as i64 { self.overflow = Some(record); break; } else { diff --git a/src/bam/ext.rs b/src/bam/ext.rs index 82919f7c2..5b9eb1e4b 100644 --- a/src/bam/ext.rs +++ b/src/bam/ext.rs @@ -21,7 +21,7 @@ pub trait BamRecordExtensions { /// this happens on insertions. /// /// pysam: blocks - fn aligned_blocks(&self) -> Vec<[i32; 2]>; + fn aligned_blocks(&self) -> Vec<[i64; 2]>; /// find intron positions (start, stop) /// @@ -29,14 +29,14 @@ pub trait BamRecordExtensions { /// and reports their positions. /// /// pysam: get_introns - fn introns(&self) -> Vec<[i32; 2]>; + fn introns(&self) -> Vec<[i64; 2]>; /// a list of aligned read and reference positions /// /// No entry for insertions, deletions or skipped pairs /// /// pysam: get_aligned_pairs(matches_only = True) - fn aligned_pairs(&self) -> Vec<[i32; 2]>; + fn aligned_pairs(&self) -> Vec<[i64; 2]>; /// a list of aligned read and reference positions /// @@ -44,7 +44,7 @@ pub trait BamRecordExtensions { /// for insertions, deletions or skipped pairs /// /// pysam: aligned_pairs(matches_only = False) - fn aligned_pairs_full(&self) -> Vec<[Option; 2]>; + fn aligned_pairs_full(&self) -> Vec<[Option; 2]>; /// the number of nucleotides covered by each Cigar:: possibility /// @@ -67,19 +67,19 @@ pub trait BamRecordExtensions { /// or unaligned positions within the read /// /// pysam: get_reference_positions(full_length=False) - fn reference_positions(&self) -> Vec; + fn reference_positions(&self) -> Vec; /// /// a Vec of reference positions that this read aligns to /// /// include soft-clipped or skipped positions as None /// /// pysam: get_reference_positions(full_length=True) - fn reference_positions_full(&self) -> Vec>; + fn reference_positions_full(&self) -> Vec>; /// left most aligned reference position of the read on the reference genome. - fn reference_start(&self) -> i32; + fn reference_start(&self) -> i64; /// right most aligned reference position of the read on the reference genome. - fn reference_end(&self) -> i32; + fn reference_end(&self) -> i64; /// infer the query length from the cigar string, optionally include hard clipped bases /// @@ -91,34 +91,34 @@ pub trait BamRecordExtensions { } impl BamRecordExtensions for bam::Record { - fn aligned_blocks(&self) -> Vec<[i32; 2]> { + fn aligned_blocks(&self) -> Vec<[i64; 2]> { let mut result = Vec::new(); let mut pos = self.pos(); for entry in self.cigar().iter() { match entry { Cigar::Match(len) => { - result.push([pos, pos + *len as i32]); - pos += *len as i32; + result.push([pos, pos + *len as i64]); + pos += *len as i64; } - Cigar::Del(len) => pos += *len as i32, - Cigar::RefSkip(len) => pos += *len as i32, + Cigar::Del(len) => pos += *len as i64, + Cigar::RefSkip(len) => pos += *len as i64, _ => (), } } result } - fn introns(&self) -> Vec<[i32; 2]> { + fn introns(&self) -> Vec<[i64; 2]> { let mut base_position = self.pos(); let mut result = Vec::new(); for entry in self.cigar().iter() { match entry { Cigar::Match(len) | Cigar::Del(len) | Cigar::Equal(len) | Cigar::Diff(len) => { - base_position += *len as i32 + base_position += *len as i64 } Cigar::RefSkip(len) => { let junc_start = base_position; - base_position += *len as i32; + base_position += *len as i64; result.push([junc_start, base_position]); } _ => {} @@ -127,25 +127,25 @@ impl BamRecordExtensions for bam::Record { result } - fn aligned_pairs(&self) -> Vec<[i32; 2]> { + fn aligned_pairs(&self) -> Vec<[i64; 2]> { let mut result = Vec::new(); - let mut pos: i32 = self.pos(); - let mut qpos: i32 = 0; + let mut pos: i64 = self.pos(); + let mut qpos: i64 = 0; for entry in self.cigar().iter() { match entry { Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => { - for i in pos..(pos + *len as i32) { + for i in pos..(pos + *len as i64) { result.push([qpos, i]); qpos += 1; } - pos += *len as i32; + pos += *len as i64; } Cigar::Ins(len) | Cigar::SoftClip(len) => { - qpos += *len as i32; + qpos += *len as i64; } Cigar::Del(len) | Cigar::RefSkip(len) => { - pos += *len as i32; + pos += *len as i64; } Cigar::HardClip(_) => {} // no advance Cigar::Pad(_) => panic!("Padding (Cigar::Pad) is not supported."), //padding is only used for multiple sequence alignment @@ -154,31 +154,31 @@ impl BamRecordExtensions for bam::Record { result } - fn aligned_pairs_full(&self) -> Vec<[Option; 2]> { + fn aligned_pairs_full(&self) -> Vec<[Option; 2]> { let mut result = Vec::new(); - let mut pos: i32 = self.pos(); - let mut qpos: i32 = 0; + let mut pos: i64 = self.pos(); + let mut qpos: i64 = 0; for entry in self.cigar().iter() { match entry { Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => { - for i in pos..(pos + *len as i32) { + for i in pos..(pos + *len as i64) { result.push([Some(qpos), Some(i)]); qpos += 1; } - pos += *len as i32; + pos += *len as i64; } Cigar::Ins(len) | Cigar::SoftClip(len) => { - for i in qpos..(qpos + *len as i32) { + for i in qpos..(qpos + *len as i64) { result.push([Some(i), None]); } - qpos += *len as i32; + qpos += *len as i64; } Cigar::Del(len) | Cigar::RefSkip(len) => { - for i in pos..(pos + *len as i32) { + for i in pos..(pos + *len as i64) { result.push([None, Some(i)]); } - pos += *len as i32; + pos += *len as i64; } Cigar::HardClip(_) => {} // no advance Cigar::Pad(_) => panic!("Padding (Cigar::Pad) is not supported."), //padding is only used for multiple sequence alignment @@ -244,21 +244,21 @@ impl BamRecordExtensions for bam::Record { result } - fn reference_positions(&self) -> Vec { + fn reference_positions(&self) -> Vec { self.aligned_pairs().iter().map(|x| x[1]).collect() } - fn reference_positions_full(&self) -> Vec> { + fn reference_positions_full(&self) -> Vec> { self.aligned_pairs_full() .iter() .filter(|x| x[0].is_some()) .map(|x| x[1]) .collect() } - fn reference_start(&self) -> i32 { + fn reference_start(&self) -> i64 { self.pos() } - fn reference_end(&self) -> i32 { + fn reference_end(&self) -> i64 { unsafe { htslib::bam_endpos(self.inner_ptr()) } } @@ -4304,11 +4304,11 @@ mod tests { // //for the rest, we just verify that they have the expected amount of None in each position - fn some_count(pairs: &Vec<[Option; 2]>, pos: usize) -> i32 { - pairs.iter().filter(|x| x[pos].is_some()).count() as i32 + fn some_count(pairs: &Vec<[Option; 2]>, pos: usize) -> i64 { + pairs.iter().filter(|x| x[pos].is_some()).count() as i64 } - fn none_count(pairs: &Vec<[Option; 2]>, pos: usize) -> i32 { - pairs.iter().filter(|x| x[pos].is_none()).count() as i32 + fn none_count(pairs: &Vec<[Option; 2]>, pos: usize) -> i64 { + pairs.iter().filter(|x| x[pos].is_none()).count() as i64 } let pairs = it.next().unwrap().unwrap().aligned_pairs_full(); diff --git a/src/bam/mod.rs b/src/bam/mod.rs index add016986..bd371c8f2 100644 --- a/src/bam/mod.rs +++ b/src/bam/mod.rs @@ -16,7 +16,6 @@ pub mod record; #[cfg(feature = "serde")] pub mod record_serde; -use libc; use std::ffi; use std::path::Path; use std::slice; @@ -29,6 +28,7 @@ pub use crate::bam::buffer::RecordBuffer; pub use crate::bam::errors::{Error, Result}; pub use crate::bam::header::Header; pub use crate::bam::record::Record; +use std::convert::TryInto; /// Implementation for `Read::set_threads` and `Writer::set_threads`. pub unsafe fn set_threads(htsfile: *mut htslib::htsFile, n_threads: usize) -> Result<()> { @@ -398,7 +398,7 @@ impl IndexedReader { if let Some(itr) = self.itr { unsafe { htslib::hts_itr_destroy(itr) } } - let itr = unsafe { htslib::sam_itr_queryi(self.idx, tid as i32, beg as i32, end as i32) }; + let itr = unsafe { htslib::sam_itr_queryi(self.idx, tid as i32, beg as i64, end as i64) }; if itr.is_null() { self.itr = None; Err(Error::Fetch) @@ -590,10 +590,13 @@ impl Writer { ); //println!("{}", str::from_utf8(&header_string).unwrap()); - let rec = htslib::sam_hdr_parse((l_text + 1) as i32, text as *const i8); + let rec = htslib::sam_hdr_parse( + ((l_text + 1) as usize).try_into().unwrap(), + text as *const i8, + ); (*rec).text = text as *mut i8; - (*rec).l_text = l_text as u32; + (*rec).l_text = l_text as u64; rec }; @@ -682,12 +685,10 @@ impl CompressionLevel { // Convert and check the variants of the `CompressionLevel` enum to a numeric level fn convert(self) -> Result { match self { - CompressionLevel::Uncompressed => Ok(htslib::Z_NO_COMPRESSION), - CompressionLevel::Fastest => Ok(htslib::Z_BEST_SPEED), - CompressionLevel::Maximum => Ok(htslib::Z_BEST_COMPRESSION), - CompressionLevel::Level(i @ htslib::Z_NO_COMPRESSION..=htslib::Z_BEST_COMPRESSION) => { - Ok(i) - } + CompressionLevel::Uncompressed => Ok(0), + CompressionLevel::Fastest => Ok(1), + CompressionLevel::Maximum => Ok(9), + CompressionLevel::Level(i @ 0..=9) => Ok(i), CompressionLevel::Level(i) => Err(Error::InvalidCompressionLevel { level: i }), } } @@ -815,9 +816,9 @@ impl HeaderView { header_string.len(), ); - let rec = htslib::sam_hdr_parse((l_text + 1) as i32, text as *const i8); + let rec = htslib::sam_hdr_parse((l_text + 1) as u64, text as *const i8); (*rec).text = text as *mut i8; - (*rec).l_text = l_text as u32; + (*rec).l_text = l_text as u64; rec }; @@ -853,7 +854,7 @@ impl HeaderView { pub fn tid(&self, name: &[u8]) -> Option { let c_str = ffi::CString::new(name).expect("Expected valid name."); - let tid = unsafe { htslib::bam_name2id(self.inner, c_str.as_ptr()) }; + let tid = unsafe { htslib::sam_hdr_name2tid(self.inner, c_str.as_ptr()) }; if tid < 0 { None } else { @@ -895,7 +896,7 @@ impl HeaderView { impl Clone for HeaderView { fn clone(&self) -> Self { HeaderView { - inner: unsafe { htslib::bam_hdr_dup(self.inner) }, + inner: unsafe { htslib::sam_hdr_dup(self.inner) }, owned: true, } } @@ -905,7 +906,7 @@ impl Drop for HeaderView { fn drop(&mut self) { if self.owned { unsafe { - htslib::bam_hdr_destroy(self.inner); + htslib::sam_hdr_destroy(self.inner); } } } @@ -977,6 +978,28 @@ CCCCCCCCCCCCCCCCCCC"[..], (names, flags, seqs, quals, cigars) } + fn compare_inner_bam_cram_records(cram_records: &Vec, bam_records: &Vec) { + // Selectively compares bam1_t struct fields from BAM and CRAM + for (c1, b1) in cram_records.iter().zip(bam_records.iter()) { + // CRAM vs BAM l_data is off by 3, see: https://github.com/rust-bio/rust-htslib/pull/184#issuecomment-590133544 + // The rest of the fields should be identical: + assert_eq!(c1.cigar(), b1.cigar()); + assert_eq!(c1.inner().core.pos, b1.inner().core.pos); + assert_eq!(c1.inner().core.mpos, b1.inner().core.mpos); + assert_eq!(c1.inner().core.mtid, b1.inner().core.mtid); + assert_eq!(c1.inner().core.tid, b1.inner().core.tid); + assert_eq!(c1.inner().core.bin, b1.inner().core.bin); + assert_eq!(c1.inner().core.qual, b1.inner().core.qual); + assert_eq!(c1.inner().core.l_extranul, b1.inner().core.l_extranul); + assert_eq!(c1.inner().core.flag, b1.inner().core.flag); + assert_eq!(c1.inner().core.l_qname, b1.inner().core.l_qname); + assert_eq!(c1.inner().core.n_cigar, b1.inner().core.n_cigar); + assert_eq!(c1.inner().core.l_qseq, b1.inner().core.l_qseq); + assert_eq!(c1.inner().core.isize, b1.inner().core.isize); + //... except m_data + } + } + #[test] fn test_read() { let (names, flags, seqs, quals, cigars) = gold(); @@ -1379,7 +1402,7 @@ CCCCCCCCCCCCCCCCCCC"[..], let idx = i % names.len(); rec.set(names[idx], Some(&cigars[idx]), seqs[idx], quals[idx]); rec.push_aux(b"NM", &Aux::Integer(15)); - rec.set_pos(i as i32); + rec.set_pos(i as i64); bam.write(&mut rec).expect("Failed to write record."); } @@ -1395,7 +1418,7 @@ CCCCCCCCCCCCCCCCCCC"[..], let rec = _rec.expect("Failed to read record."); - assert_eq!(rec.pos(), i as i32); + assert_eq!(rec.pos(), i as i64); assert_eq!(rec.qname(), names[idx]); assert_eq!(*rec.cigar(), cigars[idx]); assert_eq!(rec.seq().as_bytes(), seqs[idx]); @@ -1559,10 +1582,7 @@ CCCCCCCCCCCCCCCCCCC"[..], let mut bam_reader = Reader::from_path(bam_path).unwrap(); let bam_records: Vec = bam_reader.records().map(|v| v.unwrap()).collect(); - // Compare CRAM records to BAM records - for (c1, b1) in cram_records.iter().zip(bam_records.iter()) { - assert!(c1 == b1); - } + compare_inner_bam_cram_records(&cram_records, &bam_records); } #[test] @@ -1631,9 +1651,7 @@ CCCCCCCCCCCCCCCCCCC"[..], let cram_records: Vec = cram_reader.records().map(|v| v.unwrap()).collect(); // Compare CRAM records to BAM records - for (c1, b1) in cram_records.iter().zip(bam_records.iter()) { - assert!(c1 == b1); - } + compare_inner_bam_cram_records(&cram_records, &bam_records); } tmp.close().expect("Failed to delete temp dir"); diff --git a/src/bam/record.rs b/src/bam/record.rs index a0c6d33c8..f31eb1ae7 100644 --- a/src/bam/record.rs +++ b/src/bam/record.rs @@ -26,7 +26,7 @@ use bio_types::sequence::SequenceRead; /// A macro creating methods for flag access. macro_rules! flag { - ($get:ident, $set:ident, $unset:ident, $bit:expr) => ( + ($get:ident, $set:ident, $unset:ident, $bit:expr) => { pub fn $get(&self) -> bool { self.inner().core.flag & $bit != 0 } @@ -38,7 +38,7 @@ macro_rules! flag { pub fn $unset(&mut self) { self.inner_mut().core.flag &= !$bit; } - ) + }; } /// A BAM record. @@ -141,8 +141,8 @@ impl Record { let mut sam_string = htslib::kstring_t { s: sam_copy.as_ptr() as *mut i8, - l: sam_copy.len() as usize, - m: sam_copy.len() as usize, + l: sam_copy.len() as u64, + m: sam_copy.len() as u64, }; let succ = unsafe { @@ -197,12 +197,12 @@ impl Record { } /// Get position (0-based). - pub fn pos(&self) -> i32 { + pub fn pos(&self) -> i64 { self.inner().core.pos } /// Set position (0-based). - pub fn set_pos(&mut self, pos: i32) { + pub fn set_pos(&mut self, pos: i64) { self.inner_mut().core.pos = pos; } @@ -250,22 +250,22 @@ impl Record { } /// Get mate position. - pub fn mpos(&self) -> i32 { + pub fn mpos(&self) -> i64 { self.inner().core.mpos } /// Set mate position. - pub fn set_mpos(&mut self, mpos: i32) { + pub fn set_mpos(&mut self, mpos: i64) { self.inner_mut().core.mpos = mpos; } /// Get insert size. - pub fn insert_size(&self) -> i32 { + pub fn insert_size(&self) -> i64 { self.inner().core.isize } /// Set insert size. - pub fn set_insert_size(&mut self, insert_size: i32) { + pub fn set_insert_size(&mut self, insert_size: i64) { self.inner_mut().core.isize = insert_size; } @@ -351,7 +351,7 @@ impl Record { data[qname.len() + i] = b'\0'; } let mut i = q_len + extranul; - self.inner_mut().core.l_qname = i as u8; + self.inner_mut().core.l_qname = i as u16; self.inner_mut().core.l_extranul = extranul as u8; // cigar @@ -434,7 +434,7 @@ impl Record { for i in 0..=extranul { data[new_q_len - i - 1] = b'\0'; } - self.inner_mut().core.l_qname = new_q_len as u8; + self.inner_mut().core.l_qname = new_q_len as u16; self.inner_mut().core.l_extranul = extranul as u8; } @@ -919,7 +919,7 @@ custom_derive! { impl CigarString { /// Create a `CigarStringView` from this CigarString at position `pos` - pub fn into_view(self, pos: i32) -> CigarStringView { + pub fn into_view(self, pos: i64) -> CigarStringView { CigarStringView::new(self, pos) } @@ -1065,17 +1065,17 @@ impl fmt::Display for CigarString { #[derive(Eq, PartialEq, Clone, Debug)] pub struct CigarStringView { inner: CigarString, - pos: i32, + pos: i64, } impl CigarStringView { /// Construct a new CigarStringView from a CigarString at a position - pub fn new(c: CigarString, pos: i32) -> CigarStringView { + pub fn new(c: CigarString, pos: i64) -> CigarStringView { CigarStringView { inner: c, pos } } /// Get (exclusive) end position of alignment. - pub fn end_pos(&self) -> i32 { + pub fn end_pos(&self) -> i64 { let mut pos = self.pos; for c in self { match c { @@ -1083,7 +1083,7 @@ impl CigarStringView { | Cigar::RefSkip(l) | Cigar::Del(l) | Cigar::Equal(l) - | Cigar::Diff(l) => pos += *l as i32, + | Cigar::Diff(l) => pos += *l as i64, // these don't add to end_pos on reference Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => (), } diff --git a/src/bcf/header.rs b/src/bcf/header.rs index aeacb562a..087e7424e 100644 --- a/src/bcf/header.rs +++ b/src/bcf/header.rs @@ -243,7 +243,7 @@ impl HeaderView { #[inline] fn inner(&self) -> htslib::bcf_hdr_t { - unsafe { (*self.inner) } + unsafe { *self.inner } } /// Get the number of samples defined in the header. @@ -333,7 +333,8 @@ impl HeaderView { _ => return Err(Error::UnexpectedType { tag: tag_desc() }), }; let length = match length as ::libc::c_uint { - htslib::BCF_VL_FIXED => TagLength::Fixed(num_values), + // XXX: Hacky "as u32" cast. Trace back through unsafe{} towards BCF struct and rollback to proper type + htslib::BCF_VL_FIXED => TagLength::Fixed(num_values as u32), htslib::BCF_VL_VAR => TagLength::Variable, htslib::BCF_VL_A => TagLength::AltAlleles, htslib::BCF_VL_R => TagLength::Alleles, diff --git a/src/bcf/mod.rs b/src/bcf/mod.rs index b9dd6969f..b72cd1721 100644 --- a/src/bcf/mod.rs +++ b/src/bcf/mod.rs @@ -228,7 +228,7 @@ impl IndexedReader { pub fn fetch(&mut self, rid: u32, start: u32, end: u32) -> Result<()> { let contig = self.header.rid2name(rid).unwrap(); let contig = ffi::CString::new(contig).unwrap(); - if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i32) } != 0 { + if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 { Err(Error::Seek { contig: contig.to_str().unwrap().to_owned(), start, @@ -443,7 +443,7 @@ pub mod synced { let record = *(*(*self.inner).readers.offset(idx as isize)) .buffer .offset(0); - if (*record).rid != (rid as i32) || (*record).pos >= (end as i32) { + if (*record).rid != (rid as i32) || (*record).pos >= (end as i64) { return Ok(0); } } @@ -505,7 +505,7 @@ pub mod synced { let contig = self.header(0).rid2name(rid).unwrap(); //.clone(); ffi::CString::new(contig).unwrap() }; - if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i32) } != 0 { + if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 { Err(Error::Seek { contig: contig.to_str().unwrap().to_owned(), start, diff --git a/src/bcf/record.rs b/src/bcf/record.rs index 52bcb6368..edf7d46df 100644 --- a/src/bcf/record.rs +++ b/src/bcf/record.rs @@ -165,7 +165,7 @@ impl Record { } /// Set 0-based position. - pub fn set_pos(&mut self, pos: i32) { + pub fn set_pos(&mut self, pos: i64) { self.inner_mut().pos = pos; } @@ -589,7 +589,7 @@ impl Record { } pub fn remove_alleles(&mut self, remove: &[bool]) -> Result<()> { - let rm_set = unsafe { htslib::kbs_init(remove.len()) }; + let rm_set = unsafe { htslib::kbs_init(remove.len() as u64) }; for (i, &r) in remove.iter().enumerate() { if r { diff --git a/src/tbx/mod.rs b/src/tbx/mod.rs index 1adda1cc7..ee2792b51 100644 --- a/src/tbx/mod.rs +++ b/src/tbx/mod.rs @@ -117,11 +117,11 @@ pub struct Reader { itr: Option<*mut htslib::hts_itr_t>, /// The currently fetch region's tid. - tid: i32, + tid: i64, /// The currently fetch region's 0-based begin pos. - start: i32, + start: i64, /// The currently fetch region's 0-based end pos. - end: i32, + end: i64, } unsafe impl Send for Reader {} @@ -196,7 +196,7 @@ impl Reader { } /// Get sequence/target ID from sequence name. - pub fn tid(&self, name: &str) -> Result { + pub fn tid(&self, name: &str) -> Result { let name_cstr = ffi::CString::new(name.as_bytes()).unwrap(); let res = unsafe { htslib::tbx_name2id(self.tbx, name_cstr.as_ptr()) }; if res < 0 { @@ -204,15 +204,15 @@ impl Reader { sequence: name.to_owned(), }) } else { - Ok(res as u32) + Ok(res as u64) } } /// Fetch region given by numeric sequence number and 0-based begin and end position. - pub fn fetch(&mut self, tid: u32, start: u32, end: u32) -> Result<()> { - self.tid = tid as i32; - self.start = start as i32; - self.end = end as i32; + pub fn fetch(&mut self, tid: u64, start: u64, end: u64) -> Result<()> { + self.tid = tid as i64; + self.start = start as i64; + self.end = end as i64; if let Some(itr) = self.itr { unsafe { @@ -223,8 +223,8 @@ impl Reader { htslib::hts_itr_query( (*self.tbx).idx, tid as i32, - start as i32, - end as i32, + start as i64, + end as i64, Some(htslib::tbx_readrec), ) }; @@ -278,7 +278,7 @@ impl Reader { } /// Return whether the two given genomic intervals overlap. -fn overlap(tid1: i32, begin1: i32, end1: i32, tid2: i32, begin2: i32, end2: i32) -> bool { +fn overlap(tid1: i64, begin1: i64, end1: i64, tid2: i64, begin2: i64, end2: i64) -> bool { (tid1 == tid2) && (begin1 < end2) && (begin2 < end1) } @@ -293,7 +293,7 @@ impl Read for Reader { htslib::hts_get_bgzfp(self.hts_file), itr, //mem::transmute(&mut self.buf), - &mut self.buf as *mut htslib::__kstring_t as *mut libc::c_void, + &mut self.buf as *mut htslib::kstring_t as *mut libc::c_void, //mem::transmute(self.tbx), self.tbx as *mut libc::c_void, ) @@ -310,7 +310,8 @@ impl Read for Reader { // returns `< 0`). let (tid, start, end) = unsafe { ((*itr).curr_tid, (*itr).curr_beg, (*itr).curr_end) }; - if overlap(self.tid, self.start, self.end, tid, start, end) { + // XXX: Careful with this tid conversion!!! + if overlap(self.tid, self.start, self.end, tid as i64, start, end) { *record = unsafe { Vec::from(ffi::CStr::from_ptr(self.buf.s).to_str().unwrap()) }; return Ok(true); @@ -366,17 +367,11 @@ mod tests { use super::*; #[test] - fn bed_header() { + fn bed_basic() { let reader = Reader::from_path("test/test_bed3.bed.gz") .ok() .expect("Error opening file."); - // Check header lines. - assert_eq!( - reader.header, - vec![String::from("#foo"), String::from("#bar")] - ); - // Check sequence name vector. assert_eq!( reader.seqnames(), diff --git a/test/test_bed3.bed b/test/test_bed3.bed deleted file mode 100644 index db7cb61ea..000000000 --- a/test/test_bed3.bed +++ /dev/null @@ -1,5 +0,0 @@ -#foo -#bar -chr1 1001 1002 -chr1 1004 1005 -chr2 1005 1006 diff --git a/test/test_bed3.bed.gz b/test/test_bed3.bed.gz index 125be6444..5218b402d 100644 Binary files a/test/test_bed3.bed.gz and b/test/test_bed3.bed.gz differ diff --git a/test/test_bed3.bed.gz.tbi b/test/test_bed3.bed.gz.tbi index faf0054c6..f97aa1498 100644 Binary files a/test/test_bed3.bed.gz.tbi and b/test/test_bed3.bed.gz.tbi differ