From 651b7e2f776e4b52644fb6a8935fbb5d712b2a14 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Thu, 13 May 2021 10:23:14 +1000 Subject: [PATCH 1/7] add docs for bcf Record::format --- src/bcf/record.rs | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/bcf/record.rs b/src/bcf/record.rs index 71ef477c5..bd86dd03b 100644 --- a/src/bcf/record.rs +++ b/src/bcf/record.rs @@ -558,6 +558,37 @@ impl Record { }) } + /// Retrieve data for a `FORMAT` field + /// + /// # Example + /// *Note: some boilerplate for the example is hidden for clarity. See [module documentation](../index.html#example-writing) + /// for an example of the setup used here.* + /// + /// ```rust + /// # use rust_htslib::bcf::{Format, Writer}; + /// # use rust_htslib::bcf::header::Header; + /// # + /// # // Create minimal VCF header with a single sample + /// # let mut header = Header::new(); + /// header.push_sample(b"sample1").push_sample(b"sample2").push_record(br#"##FORMAT="#); + /// # + /// # // Write uncompressed VCF to stdout with above header and get an empty record + /// # let mut vcf = Writer::from_stdout(&header, true, Format::VCF).unwrap(); + /// # let mut record = vcf.empty_record(); + /// record.push_format_integer(b"DP", &[20, 12]).expect("Failed to set DP format field"); + /// + /// let read_depths = record.format(b"DP").integer().expect("Couldn't retrieve DP field"); + /// let sample1_depth = read_depths[0]; + /// assert_eq!(sample1_depth, &[20]); + /// let sample2_depth = read_depths[1]; + /// assert_eq!(sample2_depth, &[12]) + /// ``` + /// + /// # Errors + /// **Attention:** the returned [`BufferBacked`] from [`integer()`](Format::integer) + /// (`read_depths`), which holds the data, has to be kept in scope as long as the data is + /// accessed. If parts of the data are accessed after the `BufferBacked` object is been + /// dropped, you will access unallocated memory. pub fn format<'a>(&'a self, tag: &'a [u8]) -> Format<'a, Buffer> { self.format_shared_buffer(tag, Buffer::new()) } @@ -1222,7 +1253,7 @@ impl<'a, 'b, B: BorrowMut + Borrow + 'b> Format<'a, B> { /// Get format data as integers. /// /// **Attention:** the returned BufferBacked which holds the data has to be kept in scope - /// as along as the data is accessed. If parts of the data are accessed while + /// as long as the data is accessed. If parts of the data are accessed while /// the BufferBacked object is already dropped, you will access unallocated /// memory. pub fn integer(mut self) -> Result, B>> { From be9216895dcba84bbe8c821619f58e6cce2f5297 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Fri, 14 May 2021 12:30:15 +1000 Subject: [PATCH 2/7] change signature for filter methods --- CHANGELOG.md | 4 + src/bcf/mod.rs | 9 +- src/bcf/record.rs | 269 +++++++++++++++++++++++++++++++++++++--------- 3 files changed, 229 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index af869da89..e0d9a07a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ This project adheres to [Semantic Versioning](http://semver.org/). ### Added - `bcf::Record` methods `end`, `clear`, and `rlen` (@mbhall88) +### Changes +- `bcf::Record` methods `has_filter`, `remove_filter`, `push_filter`, and `set_filter` + all now take a byte slice (`&[u8]`) instead of an `Id`. + [Unreleased]: https://github.com/rust-bio/rust-htslib/compare/v0.36.0...HEAD ## [0.36.0] - 2020-11-23 diff --git a/src/bcf/mod.rs b/src/bcf/mod.rs index 65e75c277..dc8216a49 100644 --- a/src/bcf/mod.rs +++ b/src/bcf/mod.rs @@ -1230,7 +1230,6 @@ mod tests { Format::VCF, ) .expect("Error opening file."); - let header = writer.header().clone(); // Setup empty record, filled below. let mut record = writer.empty_record(); @@ -1254,10 +1253,10 @@ mod tests { record.push_id(b"first_id").unwrap(); assert!(record.filters().next().is_none()); - record.set_filters(&[header.name_to_id(b"q10").unwrap()]); - record.push_filter(header.name_to_id(b"s50").unwrap()); - record.remove_filter(header.name_to_id(b"q10").unwrap(), true); - record.push_filter(header.name_to_id(b"q10").unwrap()); + record.set_filters(&[b"q10"]).unwrap(); + record.push_filter(b"s50").unwrap(); + record.remove_filter(b"q10", true).unwrap(); + record.push_filter(b"q10").unwrap(); record.set_alleles(&[b"C", b"T", b"G"]).unwrap(); diff --git a/src/bcf/record.rs b/src/bcf/record.rs index bd86dd03b..2fc1bf043 100644 --- a/src/bcf/record.rs +++ b/src/bcf/record.rs @@ -120,7 +120,7 @@ impl<'a, T: 'a + fmt::Debug, B: Borrow + 'a> Deref for BufferBacked<'a, } impl<'a, T: 'a + fmt::Debug + fmt::Display, B: Borrow + 'a> fmt::Display - for BufferBacked<'a, T, B> +for BufferBacked<'a, T, B> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(&self.value, f) @@ -335,69 +335,159 @@ impl Record { /// Query whether the filter with the given ID has been set. /// - /// # Arguments + /// This method can be used to check if a record passes filtering by using either `PASS` or `.` /// - /// - `flt_id` - The filter ID to query for. - pub fn has_filter(&self, flt_id: Id) -> bool { - if *flt_id == 0 && self.inner().d.n_flt == 0 { - return true; - } - for i in 0..(self.inner().d.n_flt as isize) { - if unsafe { *self.inner().d.flt.offset(i) } == *flt_id as i32 { - return true; - } + /// # Example + /// ```rust + /// # use rust_htslib::bcf::{Format, Header, Writer}; + /// # use tempfile::NamedTempFile; + /// # let tmp = tempfile::NamedTempFile::new().unwrap(); + /// # let path = tmp.path(); + /// let mut header = Header::new(); + /// header.push_record(br#"##FILTER="#); + /// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + /// # let mut record = vcf.empty_record(); + /// assert!(record.has_filter(b"PASS")); + /// assert!(record.has_filter(b".")); + /// + /// record.push_filter(b"foo").unwrap(); + /// assert!(record.has_filter(b"foo")); + /// assert!(!record.has_filter(b"PASS")) + /// ``` + pub fn has_filter(&self, flt_id: &[u8]) -> bool { + let filter_c_str = ffi::CString::new(flt_id).unwrap(); + match unsafe { htslib::bcf_has_filter(self.header().inner, self.inner, filter_c_str.as_ptr() as *mut i8) } { + 1 => true, + _ => false } - false } - /// Set the given filters IDs to the FILTER column. + /// Set the given filter IDs to the FILTER column. /// - /// Setting an empty slice removes all filters. + /// Setting an empty slice removes all filters and sets `PASS`. /// - /// # Arguments + /// # Example + /// ```rust + /// # use rust_htslib::bcf::{Format, Header, Writer}; + /// # use tempfile::NamedTempFile; + /// # let tmp = tempfile::NamedTempFile::new().unwrap(); + /// # let path = tmp.path(); + /// let mut header = Header::new(); + /// header.push_record(br#"##FILTER="#); + /// header.push_record(br#"##FILTER="#); + /// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + /// # let mut record = vcf.empty_record(); + /// assert!(record.has_filter(b"PASS")); + /// record.set_filters(&[b"foo", b"bar"]).unwrap(); + /// assert!(record.has_filter(b"foo")); + /// assert!(record.has_filter(b"bar")); + /// assert!(!record.has_filter(b"PASS")); + /// record.set_filters(&[]).unwrap(); + /// assert!(record.has_filter(b"PASS")); + /// assert!(!record.has_filter(b"foo")); + /// // 'baz' isn't in the header + /// assert!(record.set_filters(&[b"foo", b"baz"]).is_err()) + /// ``` /// - /// - `flt_ids` - The identifiers of the filter values to set. - pub fn set_filters(&mut self, flt_ids: &[Id]) { - let mut flt_ids: Vec = flt_ids.iter().map(|x| **x as i32).collect(); + /// # Errors + /// If any of the filter IDs do not exist in the header, an [`Error::BcfUnknownID`] is returned. + /// + pub fn set_filters(&mut self, flt_ids: &[&[u8]]) -> Result<()> { + // let id = self.header().name_to_id(flt_id)?; + let mut ids: Vec = flt_ids.iter().map(|id| self.header().name_to_id(id).and_then(|id| Ok(*id as i32))).collect::>>()?; unsafe { htslib::bcf_update_filter( self.header().inner, self.inner, - flt_ids.as_mut_ptr(), - flt_ids.len() as i32, + ids.as_mut_ptr(), + ids.len() as i32, ); } + Ok(()) } /// Add the given filter to the FILTER column. /// - /// If `val` corresponds to `"PASS"` then all existing filters are removed first. If other than - /// `"PASS"`, then existing `"PASS"` is removed. + /// If `flt_id` is `PASS` or `.` then all existing filters are removed first. Otherwise, + /// any existing `PASS` filter is removed. /// - /// # Arguments + /// # Example + /// ```rust + /// # use rust_htslib::bcf::{Format, Header, Writer}; + /// # use tempfile::NamedTempFile; + /// # let tmp = tempfile::NamedTempFile::new().unwrap(); + /// # let path = tmp.path(); + /// let mut header = Header::new(); + /// header.push_record(br#"##FILTER="#); + /// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + /// # let mut record = vcf.empty_record(); + /// assert!(record.has_filter(b"PASS")); + /// + /// record.push_filter(b"foo").unwrap(); + /// assert!(record.has_filter(b"foo")); + /// assert!(!record.has_filter(b"PASS")); + /// // filter must exist in the header + /// assert!(record.push_filter(b"bar").is_err()) + /// ``` /// - /// - `flt_id` - The corresponding filter ID value to add. - pub fn push_filter(&mut self, flt_id: Id) { + /// # Errors + /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned. + /// + pub fn push_filter(&mut self, flt_id: &[u8]) -> Result<()> { + let id = self.header().name_to_id(flt_id)?; unsafe { - htslib::bcf_add_filter(self.header().inner, self.inner, *flt_id as i32); - } + htslib::bcf_add_filter(self.header().inner, self.inner, *id as i32); + }; + Ok(()) } /// Remove the given filter from the FILTER column. /// /// # Arguments /// - /// - `val` - The corresponding filter ID to remove. - /// - `pass_on_empty` - Set to "PASS" when removing the last value. - pub fn remove_filter(&mut self, flt_id: Id, pass_on_empty: bool) { + /// - `flt_id` - The corresponding filter ID to remove. + /// - `pass_on_empty` - Set to `PASS` when removing the last filter. + /// + /// # Example + /// ```rust + /// # use rust_htslib::bcf::{Format, Header, Writer}; + /// # use tempfile::NamedTempFile; + /// # let tmp = tempfile::NamedTempFile::new().unwrap(); + /// # let path = tmp.path(); + /// let mut header = Header::new(); + /// header.push_record(br#"##FILTER="#); + /// header.push_record(br#"##FILTER="#); + /// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + /// # let mut record = vcf.empty_record(); + /// record.set_filters(&[b"foo", b"bar"]).unwrap(); + /// assert!(record.has_filter(b"foo")); + /// assert!(record.has_filter(b"bar")); + /// + /// record.remove_filter(b"foo", true).unwrap(); + /// assert!(!record.has_filter(b"foo")); + /// assert!(record.has_filter(b"bar")); + /// // 'baz' is not in the header + /// assert!(record.remove_filter(b"baz", true).is_err()); + /// + /// record.remove_filter(b"bar", true).unwrap(); + /// assert!(!record.has_filter(b"bar")); + /// assert!(record.has_filter(b"PASS")); + /// ``` + /// + /// # Errors + /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned. + /// + pub fn remove_filter(&mut self, flt_id: &[u8], pass_on_empty: bool) -> Result<()> { + let id = self.header().name_to_id(flt_id)?; unsafe { htslib::bcf_remove_filter( self.header().inner, self.inner, - *flt_id as i32, + *id as i32, pass_on_empty as i32, - ); - } + ) + }; + Ok(()) } /// Get alleles strings. @@ -550,8 +640,8 @@ impl Record { } pub fn genotypes_shared_buffer<'a, B>(&self, buffer: B) -> Result> - where - B: BorrowMut + Borrow + 'a, + where + B: BorrowMut + Borrow + 'a, { Ok(Genotypes { encoded: self.format_shared_buffer(b"GT", buffer).integer()?, @@ -933,7 +1023,7 @@ impl genome::AbstractLocus for Record { .rid2name(self.rid().expect("rid not set")) .expect("unable to find rid in header"), ) - .expect("unable to interpret contig name as UTF-8") + .expect("unable to interpret contig name as UTF-8") } fn pos(&self) -> u64 { @@ -953,8 +1043,8 @@ pub enum GenotypeAllele { impl GenotypeAllele { /// Decode given integer according to BCF standard. #[deprecated( - since = "0.36.0", - note = "Please use the conversion trait From for GenotypeAllele instead." + since = "0.36.0", + note = "Please use the conversion trait From for GenotypeAllele instead." )] pub fn from_encoded(encoded: i32) -> Self { match (encoded, encoded & 1) { @@ -1032,8 +1122,8 @@ impl fmt::Display for Genotype { /// Lazy representation of genotypes, that does no computation until a particular genotype is queried. #[derive(Debug)] pub struct Genotypes<'a, B> -where - B: Borrow + 'a, + where + B: Borrow + 'a, { encoded: BufferBacked<'a, Vec<&'a [i32]>, B>, } @@ -1154,14 +1244,14 @@ impl<'a, 'b, B: BorrowMut + Borrow + 'b> Info<'a, B> { unsafe { slice::from_raw_parts(self.buffer.borrow().inner as *const u8, ret as usize) } - .split(|c| *c == b',') - .map(|s| { - // stop at zero character - s.split(|c| *c == 0u8) - .next() - .expect("Bug: returned string should not be empty.") - }) - .collect(), + .split(|c| *c == b',') + .map(|s| { + // stop at zero character + s.split(|c| *c == 0u8) + .next() + .expect("Bug: returned string should not be empty.") + }) + .collect(), self.buffer, ) }) @@ -1404,4 +1494,87 @@ mod tests { assert_eq!(record.sample_count(), 0); assert_eq!(record.pos(), 0) } + + #[test] + fn test_record_has_filter_pass_is_default() { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path(); + let header = Header::new(); + let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + let record = vcf.empty_record(); + + assert!(record.has_filter(b"PASS")); + assert!(record.has_filter(b".")); + assert!(!record.has_filter(b"foo")) + } + + #[test] + fn test_record_has_filter_custom() { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path(); + let mut header = Header::new(); + header.push_record(br#"##FILTER="#); + let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + let mut record = vcf.empty_record(); + record.push_filter(b"foo").unwrap(); + + assert!(record.has_filter(b"foo")); + assert!(!record.has_filter(b"PASS")) + } + + #[test] + fn test_record_push_filter() { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path(); + let mut header = Header::new(); + header.push_record(br#"##FILTER="#); + let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + let mut record = vcf.empty_record(); + assert!(record.has_filter(b"PASS")); + record.push_filter(b"foo").unwrap(); + assert!(record.has_filter(b"foo")); + assert!(!record.has_filter(b"PASS")); + assert!(record.push_filter(b"bar").is_err()) + } + + #[test] + fn test_record_set_filters() { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path(); + let mut header = Header::new(); + header.push_record(br#"##FILTER="#); + header.push_record(br#"##FILTER="#); + let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + let mut record = vcf.empty_record(); + assert!(record.has_filter(b"PASS")); + record.set_filters(&[b"foo", b"bar"]).unwrap(); + assert!(record.has_filter(b"foo")); + assert!(record.has_filter(b"bar")); + assert!(!record.has_filter(b"PASS")); + record.set_filters(&[]).unwrap(); + assert!(record.has_filter(b"PASS")); + assert!(!record.has_filter(b"foo")); + assert!(record.set_filters(&[b"foo", b"baz"]).is_err()) + } + + #[test] + fn test_record_remove_filter() { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path(); + let mut header = Header::new(); + header.push_record(br#"##FILTER="#); + header.push_record(br#"##FILTER="#); + let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + let mut record = vcf.empty_record(); + record.set_filters(&[b"foo", b"bar"]).unwrap(); + assert!(record.has_filter(b"foo")); + assert!(record.has_filter(b"bar")); + record.remove_filter(b"foo", true).unwrap(); + assert!(!record.has_filter(b"foo")); + assert!(record.has_filter(b"bar")); + assert!(record.remove_filter(b"baz", true).is_err()); + record.remove_filter(b"bar", true).unwrap(); + assert!(!record.has_filter(b"bar")); + assert!(record.has_filter(b"PASS")); + } } From 76b7f1501fd10fb7dd3a032f94ae7a135cdcf3c2 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Fri, 14 May 2021 12:39:15 +1000 Subject: [PATCH 3/7] fmt --- src/bcf/record.rs | 47 ++++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/src/bcf/record.rs b/src/bcf/record.rs index 2fc1bf043..5505af915 100644 --- a/src/bcf/record.rs +++ b/src/bcf/record.rs @@ -120,7 +120,7 @@ impl<'a, T: 'a + fmt::Debug, B: Borrow + 'a> Deref for BufferBacked<'a, } impl<'a, T: 'a + fmt::Debug + fmt::Display, B: Borrow + 'a> fmt::Display -for BufferBacked<'a, T, B> + for BufferBacked<'a, T, B> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(&self.value, f) @@ -356,9 +356,15 @@ impl Record { /// ``` pub fn has_filter(&self, flt_id: &[u8]) -> bool { let filter_c_str = ffi::CString::new(flt_id).unwrap(); - match unsafe { htslib::bcf_has_filter(self.header().inner, self.inner, filter_c_str.as_ptr() as *mut i8) } { + match unsafe { + htslib::bcf_has_filter( + self.header().inner, + self.inner, + filter_c_str.as_ptr() as *mut i8, + ) + } { 1 => true, - _ => false + _ => false, } } @@ -394,7 +400,10 @@ impl Record { /// pub fn set_filters(&mut self, flt_ids: &[&[u8]]) -> Result<()> { // let id = self.header().name_to_id(flt_id)?; - let mut ids: Vec = flt_ids.iter().map(|id| self.header().name_to_id(id).and_then(|id| Ok(*id as i32))).collect::>>()?; + let mut ids: Vec = flt_ids + .iter() + .map(|id| self.header().name_to_id(id).and_then(|id| Ok(*id as i32))) + .collect::>>()?; unsafe { htslib::bcf_update_filter( self.header().inner, @@ -640,8 +649,8 @@ impl Record { } pub fn genotypes_shared_buffer<'a, B>(&self, buffer: B) -> Result> - where - B: BorrowMut + Borrow + 'a, + where + B: BorrowMut + Borrow + 'a, { Ok(Genotypes { encoded: self.format_shared_buffer(b"GT", buffer).integer()?, @@ -1023,7 +1032,7 @@ impl genome::AbstractLocus for Record { .rid2name(self.rid().expect("rid not set")) .expect("unable to find rid in header"), ) - .expect("unable to interpret contig name as UTF-8") + .expect("unable to interpret contig name as UTF-8") } fn pos(&self) -> u64 { @@ -1043,8 +1052,8 @@ pub enum GenotypeAllele { impl GenotypeAllele { /// Decode given integer according to BCF standard. #[deprecated( - since = "0.36.0", - note = "Please use the conversion trait From for GenotypeAllele instead." + since = "0.36.0", + note = "Please use the conversion trait From for GenotypeAllele instead." )] pub fn from_encoded(encoded: i32) -> Self { match (encoded, encoded & 1) { @@ -1122,8 +1131,8 @@ impl fmt::Display for Genotype { /// Lazy representation of genotypes, that does no computation until a particular genotype is queried. #[derive(Debug)] pub struct Genotypes<'a, B> - where - B: Borrow + 'a, +where + B: Borrow + 'a, { encoded: BufferBacked<'a, Vec<&'a [i32]>, B>, } @@ -1244,14 +1253,14 @@ impl<'a, 'b, B: BorrowMut + Borrow + 'b> Info<'a, B> { unsafe { slice::from_raw_parts(self.buffer.borrow().inner as *const u8, ret as usize) } - .split(|c| *c == b',') - .map(|s| { - // stop at zero character - s.split(|c| *c == 0u8) - .next() - .expect("Bug: returned string should not be empty.") - }) - .collect(), + .split(|c| *c == b',') + .map(|s| { + // stop at zero character + s.split(|c| *c == 0u8) + .next() + .expect("Bug: returned string should not be empty.") + }) + .collect(), self.buffer, ) }) From d31242dc2354b6c9d5c6b70f38b3f46106f87c2b Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Tue, 18 May 2021 09:56:51 +1000 Subject: [PATCH 4/7] add FilterId trait and switch has_filter implementation --- src/bcf/record.rs | 154 ++++++++++++++++++++++++++++------------------ 1 file changed, 95 insertions(+), 59 deletions(-) diff --git a/src/bcf/record.rs b/src/bcf/record.rs index 5505af915..5392841a1 100644 --- a/src/bcf/record.rs +++ b/src/bcf/record.rs @@ -79,6 +79,30 @@ impl NumericUtils for i32 { } } +/// A trait to allow for seamless use of bytes or integer identifiers for filters +pub trait FilterId { + fn id_from_header(&self, header: &HeaderView) -> Result; + fn is_pass(&self) -> bool; +} + +impl FilterId for [u8] { + fn id_from_header(&self, header: &HeaderView) -> Result { + header.name_to_id(self) + } + fn is_pass(&self) -> bool { + matches!(self, b"PASS" | b".") + } +} + +impl FilterId for Id { + fn id_from_header(&self, _header: &HeaderView) -> Result { + Ok(*self) + } + fn is_pass(&self) -> bool { + *self == Id(0) + } +} + /// A buffer for info or format data. #[derive(Debug)] pub struct Buffer { @@ -335,11 +359,13 @@ impl Record { /// Query whether the filter with the given ID has been set. /// - /// This method can be used to check if a record passes filtering by using either `PASS` or `.` + /// This method can be used to check if a record passes filtering by using either `Id(0)`, + /// `PASS` or `.` /// /// # Example /// ```rust /// # use rust_htslib::bcf::{Format, Header, Writer}; + /// # use rust_htslib::bcf::header::Id; /// # use tempfile::NamedTempFile; /// # let tmp = tempfile::NamedTempFile::new().unwrap(); /// # let path = tmp.path(); @@ -347,25 +373,28 @@ impl Record { /// header.push_record(br#"##FILTER="#); /// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); /// # let mut record = vcf.empty_record(); - /// assert!(record.has_filter(b"PASS")); - /// assert!(record.has_filter(b".")); + /// assert!(record.has_filter("PASS".as_bytes())); + /// assert!(record.has_filter(".".as_bytes())); + /// assert!(record.has_filter(&Id(0))); /// /// record.push_filter(b"foo").unwrap(); - /// assert!(record.has_filter(b"foo")); - /// assert!(!record.has_filter(b"PASS")) + /// assert!(record.has_filter("foo".as_bytes())); + /// assert!(!record.has_filter("PASS".as_bytes())) /// ``` - pub fn has_filter(&self, flt_id: &[u8]) -> bool { - let filter_c_str = ffi::CString::new(flt_id).unwrap(); - match unsafe { - htslib::bcf_has_filter( - self.header().inner, - self.inner, - filter_c_str.as_ptr() as *mut i8, - ) - } { - 1 => true, - _ => false, + pub fn has_filter(&self, flt_id: &T) -> bool { + if flt_id.is_pass() && self.inner().d.n_flt == 0 { + return true; + } + let id = match flt_id.id_from_header(&self.header()) { + Ok(i) => *i, + Err(_) => return false, + }; + for i in 0..(self.inner().d.n_flt as isize) { + if unsafe { *self.inner().d.flt.offset(i) } == id as i32 { + return true; + } } + false } /// Set the given filter IDs to the FILTER column. @@ -383,14 +412,14 @@ impl Record { /// header.push_record(br#"##FILTER="#); /// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); /// # let mut record = vcf.empty_record(); - /// assert!(record.has_filter(b"PASS")); + /// assert!(record.has_filter("PASS".as_bytes())); /// record.set_filters(&[b"foo", b"bar"]).unwrap(); - /// assert!(record.has_filter(b"foo")); - /// assert!(record.has_filter(b"bar")); - /// assert!(!record.has_filter(b"PASS")); + /// assert!(record.has_filter("foo".as_bytes())); + /// assert!(record.has_filter("bar".as_bytes())); + /// assert!(!record.has_filter("PASS".as_bytes())); /// record.set_filters(&[]).unwrap(); - /// assert!(record.has_filter(b"PASS")); - /// assert!(!record.has_filter(b"foo")); + /// assert!(record.has_filter("PASS".as_bytes())); + /// assert!(!record.has_filter("foo".as_bytes())); /// // 'baz' isn't in the header /// assert!(record.set_filters(&[b"foo", b"baz"]).is_err()) /// ``` @@ -430,11 +459,10 @@ impl Record { /// header.push_record(br#"##FILTER="#); /// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); /// # let mut record = vcf.empty_record(); - /// assert!(record.has_filter(b"PASS")); + /// assert!(record.has_filter("PASS".as_bytes())); /// /// record.push_filter(b"foo").unwrap(); - /// assert!(record.has_filter(b"foo")); - /// assert!(!record.has_filter(b"PASS")); + /// assert!(record.has_filter("foo".as_bytes())); /// // filter must exist in the header /// assert!(record.push_filter(b"bar").is_err()) /// ``` @@ -468,19 +496,21 @@ impl Record { /// header.push_record(br#"##FILTER="#); /// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); /// # let mut record = vcf.empty_record(); - /// record.set_filters(&[b"foo", b"bar"]).unwrap(); - /// assert!(record.has_filter(b"foo")); - /// assert!(record.has_filter(b"bar")); - /// - /// record.remove_filter(b"foo", true).unwrap(); - /// assert!(!record.has_filter(b"foo")); - /// assert!(record.has_filter(b"bar")); + /// let foo = "foo".as_bytes(); + /// let bar = "bar".as_bytes(); + /// record.set_filters(&[foo, bar]).unwrap(); + /// assert!(record.has_filter(foo)); + /// assert!(record.has_filter(bar)); + /// + /// record.remove_filter(foo, true).unwrap(); + /// assert!(!record.has_filter(foo)); + /// assert!(record.has_filter(bar)); /// // 'baz' is not in the header /// assert!(record.remove_filter(b"baz", true).is_err()); /// - /// record.remove_filter(b"bar", true).unwrap(); - /// assert!(!record.has_filter(b"bar")); - /// assert!(record.has_filter(b"PASS")); + /// record.remove_filter(bar, true).unwrap(); + /// assert!(!record.has_filter(bar)); + /// assert!(record.has_filter("PASS".as_bytes())); /// ``` /// /// # Errors @@ -1512,9 +1542,11 @@ mod tests { let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); let record = vcf.empty_record(); - assert!(record.has_filter(b"PASS")); - assert!(record.has_filter(b".")); - assert!(!record.has_filter(b"foo")) + assert!(record.has_filter("PASS".as_bytes())); + assert!(record.has_filter(".".as_bytes())); + assert!(record.has_filter(&Id(0))); + assert!(!record.has_filter("foo".as_bytes())); + assert!(!record.has_filter(&Id(2))); } #[test] @@ -1527,8 +1559,8 @@ mod tests { let mut record = vcf.empty_record(); record.push_filter(b"foo").unwrap(); - assert!(record.has_filter(b"foo")); - assert!(!record.has_filter(b"PASS")) + assert!(record.has_filter("foo".as_bytes())); + assert!(!record.has_filter("PASS".as_bytes())) } #[test] @@ -1539,11 +1571,11 @@ mod tests { header.push_record(br#"##FILTER="#); let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); let mut record = vcf.empty_record(); - assert!(record.has_filter(b"PASS")); - record.push_filter(b"foo").unwrap(); - assert!(record.has_filter(b"foo")); - assert!(!record.has_filter(b"PASS")); - assert!(record.push_filter(b"bar").is_err()) + assert!(record.has_filter("PASS".as_bytes())); + record.push_filter("foo".as_bytes()).unwrap(); + assert!(record.has_filter("foo".as_bytes())); + assert!(!record.has_filter("PASS".as_bytes())); + assert!(record.push_filter("bar".as_bytes()).is_err()) } #[test] @@ -1555,15 +1587,19 @@ mod tests { header.push_record(br#"##FILTER="#); let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); let mut record = vcf.empty_record(); - assert!(record.has_filter(b"PASS")); - record.set_filters(&[b"foo", b"bar"]).unwrap(); - assert!(record.has_filter(b"foo")); - assert!(record.has_filter(b"bar")); - assert!(!record.has_filter(b"PASS")); + assert!(record.has_filter("PASS".as_bytes())); + record + .set_filters(&["foo".as_bytes(), "bar".as_bytes()]) + .unwrap(); + assert!(record.has_filter("foo".as_bytes())); + assert!(record.has_filter("bar".as_bytes())); + assert!(!record.has_filter("PASS".as_bytes())); record.set_filters(&[]).unwrap(); - assert!(record.has_filter(b"PASS")); - assert!(!record.has_filter(b"foo")); - assert!(record.set_filters(&[b"foo", b"baz"]).is_err()) + assert!(record.has_filter("PASS".as_bytes())); + assert!(!record.has_filter("foo".as_bytes())); + assert!(record + .set_filters(&["foo".as_bytes(), "baz".as_bytes()]) + .is_err()) } #[test] @@ -1576,14 +1612,14 @@ mod tests { let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); let mut record = vcf.empty_record(); record.set_filters(&[b"foo", b"bar"]).unwrap(); - assert!(record.has_filter(b"foo")); - assert!(record.has_filter(b"bar")); + assert!(record.has_filter("foo".as_bytes())); + assert!(record.has_filter("bar".as_bytes())); record.remove_filter(b"foo", true).unwrap(); - assert!(!record.has_filter(b"foo")); - assert!(record.has_filter(b"bar")); + assert!(!record.has_filter("foo".as_bytes())); + assert!(record.has_filter("bar".as_bytes())); assert!(record.remove_filter(b"baz", true).is_err()); record.remove_filter(b"bar", true).unwrap(); - assert!(!record.has_filter(b"bar")); - assert!(record.has_filter(b"PASS")); + assert!(!record.has_filter("bar".as_bytes())); + assert!(record.has_filter("PASS".as_bytes())); } } From b747a47b30644266487e3a4f2caa24198ee0c766 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Tue, 18 May 2021 10:05:40 +1000 Subject: [PATCH 5/7] update push_filter impl --- src/bcf/mod.rs | 4 ++-- src/bcf/record.rs | 25 +++++++++++++++++-------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/bcf/mod.rs b/src/bcf/mod.rs index dc8216a49..a290c1a65 100644 --- a/src/bcf/mod.rs +++ b/src/bcf/mod.rs @@ -1254,9 +1254,9 @@ mod tests { assert!(record.filters().next().is_none()); record.set_filters(&[b"q10"]).unwrap(); - record.push_filter(b"s50").unwrap(); + record.push_filter("s50".as_bytes()).unwrap(); record.remove_filter(b"q10", true).unwrap(); - record.push_filter(b"q10").unwrap(); + record.push_filter("q10".as_bytes()).unwrap(); record.set_alleles(&[b"C", b"T", b"G"]).unwrap(); diff --git a/src/bcf/record.rs b/src/bcf/record.rs index 5392841a1..7671185c3 100644 --- a/src/bcf/record.rs +++ b/src/bcf/record.rs @@ -377,7 +377,7 @@ impl Record { /// assert!(record.has_filter(".".as_bytes())); /// assert!(record.has_filter(&Id(0))); /// - /// record.push_filter(b"foo").unwrap(); + /// record.push_filter("foo".as_bytes()).unwrap(); /// assert!(record.has_filter("foo".as_bytes())); /// assert!(!record.has_filter("PASS".as_bytes())) /// ``` @@ -457,21 +457,26 @@ impl Record { /// # let path = tmp.path(); /// let mut header = Header::new(); /// header.push_record(br#"##FILTER="#); + /// header.push_record(br#"##FILTER="#); /// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); /// # let mut record = vcf.empty_record(); + /// let foo = "foo".as_bytes(); + /// let bar = record.header().name_to_id(b"bar").unwrap(); /// assert!(record.has_filter("PASS".as_bytes())); /// - /// record.push_filter(b"foo").unwrap(); - /// assert!(record.has_filter("foo".as_bytes())); + /// record.push_filter(foo).unwrap(); + /// record.push_filter(&bar).unwrap(); + /// assert!(record.has_filter(foo)); + /// assert!(record.has_filter(&bar)); /// // filter must exist in the header - /// assert!(record.push_filter(b"bar").is_err()) + /// assert!(record.push_filter("baz".as_bytes()).is_err()) /// ``` /// /// # Errors /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned. /// - pub fn push_filter(&mut self, flt_id: &[u8]) -> Result<()> { - let id = self.header().name_to_id(flt_id)?; + pub fn push_filter(&mut self, flt_id: &T) -> Result<()> { + let id = flt_id.id_from_header(self.header())?; unsafe { htslib::bcf_add_filter(self.header().inner, self.inner, *id as i32); }; @@ -1557,7 +1562,7 @@ mod tests { header.push_record(br#"##FILTER="#); let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); let mut record = vcf.empty_record(); - record.push_filter(b"foo").unwrap(); + record.push_filter("foo".as_bytes()).unwrap(); assert!(record.has_filter("foo".as_bytes())); assert!(!record.has_filter("PASS".as_bytes())) @@ -1569,13 +1574,17 @@ mod tests { let path = tmp.path(); let mut header = Header::new(); header.push_record(br#"##FILTER="#); + header.push_record(br#"##FILTER="#); let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); let mut record = vcf.empty_record(); assert!(record.has_filter("PASS".as_bytes())); record.push_filter("foo".as_bytes()).unwrap(); + let bar = record.header().name_to_id(b"bar").unwrap(); + record.push_filter(&bar).unwrap(); assert!(record.has_filter("foo".as_bytes())); + assert!(record.has_filter(&bar)); assert!(!record.has_filter("PASS".as_bytes())); - assert!(record.push_filter("bar".as_bytes()).is_err()) + assert!(record.push_filter("baz".as_bytes()).is_err()) } #[test] From be54c2e119a18eaa6f346e8dc26312925153606a Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Tue, 18 May 2021 10:52:38 +1000 Subject: [PATCH 6/7] update remove and set filter impl --- src/bcf/mod.rs | 4 ++-- src/bcf/record.rs | 58 +++++++++++++++++++++++++++++------------------ 2 files changed, 38 insertions(+), 24 deletions(-) diff --git a/src/bcf/mod.rs b/src/bcf/mod.rs index a290c1a65..7510b8bbe 100644 --- a/src/bcf/mod.rs +++ b/src/bcf/mod.rs @@ -1253,9 +1253,9 @@ mod tests { record.push_id(b"first_id").unwrap(); assert!(record.filters().next().is_none()); - record.set_filters(&[b"q10"]).unwrap(); + record.set_filters(&["q10".as_bytes()]).unwrap(); record.push_filter("s50".as_bytes()).unwrap(); - record.remove_filter(b"q10", true).unwrap(); + record.remove_filter("q10".as_bytes(), true).unwrap(); record.push_filter("q10".as_bytes()).unwrap(); record.set_alleles(&[b"C", b"T", b"G"]).unwrap(); diff --git a/src/bcf/record.rs b/src/bcf/record.rs index 7671185c3..587117229 100644 --- a/src/bcf/record.rs +++ b/src/bcf/record.rs @@ -404,6 +404,7 @@ impl Record { /// # Example /// ```rust /// # use rust_htslib::bcf::{Format, Header, Writer}; + /// # use rust_htslib::bcf::header::Id; /// # use tempfile::NamedTempFile; /// # let tmp = tempfile::NamedTempFile::new().unwrap(); /// # let path = tmp.path(); @@ -412,26 +413,32 @@ impl Record { /// header.push_record(br#"##FILTER="#); /// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); /// # let mut record = vcf.empty_record(); + /// let foo = record.header().name_to_id(b"foo").unwrap(); + /// let bar = record.header().name_to_id(b"bar").unwrap(); /// assert!(record.has_filter("PASS".as_bytes())); - /// record.set_filters(&[b"foo", b"bar"]).unwrap(); - /// assert!(record.has_filter("foo".as_bytes())); - /// assert!(record.has_filter("bar".as_bytes())); + /// let mut filters = vec![&foo, &bar]; + /// record.set_filters(&filters).unwrap(); + /// assert!(record.has_filter(&foo)); + /// assert!(record.has_filter(&bar)); /// assert!(!record.has_filter("PASS".as_bytes())); - /// record.set_filters(&[]).unwrap(); + /// filters.clear(); + /// record.set_filters(&filters).unwrap(); /// assert!(record.has_filter("PASS".as_bytes())); /// assert!(!record.has_filter("foo".as_bytes())); /// // 'baz' isn't in the header - /// assert!(record.set_filters(&[b"foo", b"baz"]).is_err()) + /// assert!(record.set_filters(&["baz".as_bytes()]).is_err()) /// ``` /// /// # Errors /// If any of the filter IDs do not exist in the header, an [`Error::BcfUnknownID`] is returned. /// - pub fn set_filters(&mut self, flt_ids: &[&[u8]]) -> Result<()> { - // let id = self.header().name_to_id(flt_id)?; + pub fn set_filters(&mut self, flt_ids: &[&T]) -> Result<()> { let mut ids: Vec = flt_ids .iter() - .map(|id| self.header().name_to_id(id).and_then(|id| Ok(*id as i32))) + .map(|id| { + id.id_from_header(self.header()) + .and_then(|id| Ok(*id as i32)) + }) .collect::>>()?; unsafe { htslib::bcf_update_filter( @@ -440,7 +447,7 @@ impl Record { ids.as_mut_ptr(), ids.len() as i32, ); - } + }; Ok(()) } @@ -511,7 +518,7 @@ impl Record { /// assert!(!record.has_filter(foo)); /// assert!(record.has_filter(bar)); /// // 'baz' is not in the header - /// assert!(record.remove_filter(b"baz", true).is_err()); + /// assert!(record.remove_filter("baz".as_bytes(), true).is_err()); /// /// record.remove_filter(bar, true).unwrap(); /// assert!(!record.has_filter(bar)); @@ -521,8 +528,12 @@ impl Record { /// # Errors /// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned. /// - pub fn remove_filter(&mut self, flt_id: &[u8], pass_on_empty: bool) -> Result<()> { - let id = self.header().name_to_id(flt_id)?; + pub fn remove_filter( + &mut self, + flt_id: &T, + pass_on_empty: bool, + ) -> Result<()> { + let id = flt_id.id_from_header(self.header())?; unsafe { htslib::bcf_remove_filter( self.header().inner, @@ -1603,7 +1614,8 @@ mod tests { assert!(record.has_filter("foo".as_bytes())); assert!(record.has_filter("bar".as_bytes())); assert!(!record.has_filter("PASS".as_bytes())); - record.set_filters(&[]).unwrap(); + let filters: &[&Id] = &[]; + record.set_filters(filters).unwrap(); assert!(record.has_filter("PASS".as_bytes())); assert!(!record.has_filter("foo".as_bytes())); assert!(record @@ -1620,15 +1632,17 @@ mod tests { header.push_record(br#"##FILTER="#); let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); let mut record = vcf.empty_record(); - record.set_filters(&[b"foo", b"bar"]).unwrap(); - assert!(record.has_filter("foo".as_bytes())); - assert!(record.has_filter("bar".as_bytes())); - record.remove_filter(b"foo", true).unwrap(); - assert!(!record.has_filter("foo".as_bytes())); - assert!(record.has_filter("bar".as_bytes())); - assert!(record.remove_filter(b"baz", true).is_err()); - record.remove_filter(b"bar", true).unwrap(); - assert!(!record.has_filter("bar".as_bytes())); + let foo = record.header().name_to_id(b"foo").unwrap(); + let bar = record.header().name_to_id(b"bar").unwrap(); + record.set_filters(&[&foo, &bar]).unwrap(); + assert!(record.has_filter(&foo)); + assert!(record.has_filter(&bar)); + record.remove_filter(&foo, true).unwrap(); + assert!(!record.has_filter(&foo)); + assert!(record.has_filter(&bar)); + assert!(record.remove_filter("baz".as_bytes(), true).is_err()); + record.remove_filter(&bar, true).unwrap(); + assert!(!record.has_filter(&bar)); assert!(record.has_filter("PASS".as_bytes())); } } From 417ab399ed9d8c9284eb38af3fdcce8b5ae0e179 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Tue, 6 Jul 2021 08:37:30 +1000 Subject: [PATCH 7/7] fix format enum spelling for vcf --- src/bcf/record.rs | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/bcf/record.rs b/src/bcf/record.rs index 61fb44d03..12c43bc3f 100644 --- a/src/bcf/record.rs +++ b/src/bcf/record.rs @@ -371,7 +371,7 @@ impl Record { /// # let path = tmp.path(); /// let mut header = Header::new(); /// header.push_record(br#"##FILTER="#); - /// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap(); /// # let mut record = vcf.empty_record(); /// assert!(record.has_filter("PASS".as_bytes())); /// assert!(record.has_filter(".".as_bytes())); @@ -411,7 +411,7 @@ impl Record { /// let mut header = Header::new(); /// header.push_record(br#"##FILTER="#); /// header.push_record(br#"##FILTER="#); - /// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap(); /// # let mut record = vcf.empty_record(); /// let foo = record.header().name_to_id(b"foo").unwrap(); /// let bar = record.header().name_to_id(b"bar").unwrap(); @@ -435,10 +435,7 @@ impl Record { pub fn set_filters(&mut self, flt_ids: &[&T]) -> Result<()> { let mut ids: Vec = flt_ids .iter() - .map(|id| { - id.id_from_header(self.header()) - .and_then(|id| Ok(*id as i32)) - }) + .map(|id| id.id_from_header(self.header()).map(|id| *id as i32)) .collect::>>()?; unsafe { htslib::bcf_update_filter( @@ -465,7 +462,7 @@ impl Record { /// let mut header = Header::new(); /// header.push_record(br#"##FILTER="#); /// header.push_record(br#"##FILTER="#); - /// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap(); /// # let mut record = vcf.empty_record(); /// let foo = "foo".as_bytes(); /// let bar = record.header().name_to_id(b"bar").unwrap(); @@ -506,7 +503,7 @@ impl Record { /// let mut header = Header::new(); /// header.push_record(br#"##FILTER="#); /// header.push_record(br#"##FILTER="#); - /// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + /// # let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap(); /// # let mut record = vcf.empty_record(); /// let foo = "foo".as_bytes(); /// let bar = "bar".as_bytes(); @@ -1555,7 +1552,7 @@ mod tests { let tmp = NamedTempFile::new().unwrap(); let path = tmp.path(); let header = Header::new(); - let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap(); let record = vcf.empty_record(); assert!(record.has_filter("PASS".as_bytes())); @@ -1571,7 +1568,7 @@ mod tests { let path = tmp.path(); let mut header = Header::new(); header.push_record(br#"##FILTER="#); - let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap(); let mut record = vcf.empty_record(); record.push_filter("foo".as_bytes()).unwrap(); @@ -1586,7 +1583,7 @@ mod tests { let mut header = Header::new(); header.push_record(br#"##FILTER="#); header.push_record(br#"##FILTER="#); - let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap(); let mut record = vcf.empty_record(); assert!(record.has_filter("PASS".as_bytes())); record.push_filter("foo".as_bytes()).unwrap(); @@ -1605,7 +1602,7 @@ mod tests { let mut header = Header::new(); header.push_record(br#"##FILTER="#); header.push_record(br#"##FILTER="#); - let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap(); let mut record = vcf.empty_record(); assert!(record.has_filter("PASS".as_bytes())); record @@ -1630,7 +1627,7 @@ mod tests { let mut header = Header::new(); header.push_record(br#"##FILTER="#); header.push_record(br#"##FILTER="#); - let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); + let vcf = Writer::from_path(path, &header, true, Format::Vcf).unwrap(); let mut record = vcf.empty_record(); let foo = record.header().name_to_id(b"foo").unwrap(); let bar = record.header().name_to_id(b"bar").unwrap();