-
Notifications
You must be signed in to change notification settings - Fork 80
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat!: Improve bcf Record filter interface and improve docs #306
Changes from 3 commits
651b7e2
be92168
76b7f15
d31242d
b747a47
be54c2e
e5891d2
417ab39
8eb08e8
1866631
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -335,69 +335,168 @@ impl Record { | |||||
|
||||||
/// Query whether the filter with the given ID has been set. | ||||||
/// | ||||||
/// # Arguments | ||||||
/// This method can be used to check if a record passes filtering by using either `PASS` or `.` | ||||||
/// | ||||||
/// - `flt_id` - The filter ID to query for. | ||||||
pub fn has_filter(&self, flt_id: Id) -> bool { | ||||||
if *flt_id == 0 && self.inner().d.n_flt == 0 { | ||||||
return true; | ||||||
} | ||||||
for i in 0..(self.inner().d.n_flt as isize) { | ||||||
if unsafe { *self.inner().d.flt.offset(i) } == *flt_id as i32 { | ||||||
return true; | ||||||
} | ||||||
/// # Example | ||||||
/// ```rust | ||||||
/// # use rust_htslib::bcf::{Format, Header, Writer}; | ||||||
/// # use tempfile::NamedTempFile; | ||||||
/// # let tmp = tempfile::NamedTempFile::new().unwrap(); | ||||||
/// # let path = tmp.path(); | ||||||
/// let mut header = Header::new(); | ||||||
/// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#); | ||||||
/// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); | ||||||
/// # let mut record = vcf.empty_record(); | ||||||
/// assert!(record.has_filter(b"PASS")); | ||||||
/// assert!(record.has_filter(b".")); | ||||||
/// | ||||||
/// record.push_filter(b"foo").unwrap(); | ||||||
/// assert!(record.has_filter(b"foo")); | ||||||
/// assert!(!record.has_filter(b"PASS")) | ||||||
/// ``` | ||||||
pub fn has_filter(&self, flt_id: &[u8]) -> bool { | ||||||
let filter_c_str = ffi::CString::new(flt_id).unwrap(); | ||||||
match unsafe { | ||||||
htslib::bcf_has_filter( | ||||||
self.header().inner, | ||||||
self.inner, | ||||||
filter_c_str.as_ptr() as *mut i8, | ||||||
) | ||||||
} { | ||||||
1 => true, | ||||||
_ => false, | ||||||
} | ||||||
false | ||||||
} | ||||||
|
||||||
/// Set the given filters IDs to the FILTER column. | ||||||
/// Set the given filter IDs to the FILTER column. | ||||||
/// | ||||||
/// Setting an empty slice removes all filters. | ||||||
/// Setting an empty slice removes all filters and sets `PASS`. | ||||||
/// | ||||||
/// # Arguments | ||||||
/// # Example | ||||||
/// ```rust | ||||||
/// # use rust_htslib::bcf::{Format, Header, Writer}; | ||||||
/// # use tempfile::NamedTempFile; | ||||||
/// # let tmp = tempfile::NamedTempFile::new().unwrap(); | ||||||
/// # let path = tmp.path(); | ||||||
/// let mut header = Header::new(); | ||||||
/// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#); | ||||||
/// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#); | ||||||
/// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); | ||||||
/// # let mut record = vcf.empty_record(); | ||||||
/// assert!(record.has_filter(b"PASS")); | ||||||
/// record.set_filters(&[b"foo", b"bar"]).unwrap(); | ||||||
/// assert!(record.has_filter(b"foo")); | ||||||
/// assert!(record.has_filter(b"bar")); | ||||||
/// assert!(!record.has_filter(b"PASS")); | ||||||
/// record.set_filters(&[]).unwrap(); | ||||||
/// assert!(record.has_filter(b"PASS")); | ||||||
/// assert!(!record.has_filter(b"foo")); | ||||||
/// // 'baz' isn't in the header | ||||||
/// assert!(record.set_filters(&[b"foo", b"baz"]).is_err()) | ||||||
/// ``` | ||||||
/// | ||||||
/// # Errors | ||||||
/// If any of the filter IDs do not exist in the header, an [`Error::BcfUnknownID`] is returned. | ||||||
/// | ||||||
/// - `flt_ids` - The identifiers of the filter values to set. | ||||||
pub fn set_filters(&mut self, flt_ids: &[Id]) { | ||||||
let mut flt_ids: Vec<i32> = flt_ids.iter().map(|x| **x as i32).collect(); | ||||||
pub fn set_filters(&mut self, flt_ids: &[&[u8]]) -> Result<()> { | ||||||
// let id = self.header().name_to_id(flt_id)?; | ||||||
let mut ids: Vec<i32> = flt_ids | ||||||
.iter() | ||||||
.map(|id| self.header().name_to_id(id).and_then(|id| Ok(*id as i32))) | ||||||
.collect::<Result<Vec<i32>>>()?; | ||||||
unsafe { | ||||||
htslib::bcf_update_filter( | ||||||
self.header().inner, | ||||||
self.inner, | ||||||
flt_ids.as_mut_ptr(), | ||||||
flt_ids.len() as i32, | ||||||
ids.as_mut_ptr(), | ||||||
ids.len() as i32, | ||||||
); | ||||||
} | ||||||
Ok(()) | ||||||
} | ||||||
|
||||||
/// Add the given filter to the FILTER column. | ||||||
/// | ||||||
/// If `val` corresponds to `"PASS"` then all existing filters are removed first. If other than | ||||||
/// `"PASS"`, then existing `"PASS"` is removed. | ||||||
/// If `flt_id` is `PASS` or `.` then all existing filters are removed first. Otherwise, | ||||||
/// any existing `PASS` filter is removed. | ||||||
/// | ||||||
/// # Arguments | ||||||
/// # Example | ||||||
/// ```rust | ||||||
/// # use rust_htslib::bcf::{Format, Header, Writer}; | ||||||
/// # use tempfile::NamedTempFile; | ||||||
/// # let tmp = tempfile::NamedTempFile::new().unwrap(); | ||||||
/// # let path = tmp.path(); | ||||||
/// let mut header = Header::new(); | ||||||
/// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#); | ||||||
/// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); | ||||||
/// # let mut record = vcf.empty_record(); | ||||||
/// assert!(record.has_filter(b"PASS")); | ||||||
/// | ||||||
/// record.push_filter(b"foo").unwrap(); | ||||||
/// assert!(record.has_filter(b"foo")); | ||||||
/// assert!(!record.has_filter(b"PASS")); | ||||||
/// // filter must exist in the header | ||||||
/// assert!(record.push_filter(b"bar").is_err()) | ||||||
/// ``` | ||||||
/// | ||||||
/// - `flt_id` - The corresponding filter ID value to add. | ||||||
pub fn push_filter(&mut self, flt_id: Id) { | ||||||
/// # Errors | ||||||
/// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned. | ||||||
/// | ||||||
pub fn push_filter(&mut self, flt_id: &[u8]) -> Result<()> { | ||||||
let id = self.header().name_to_id(flt_id)?; | ||||||
unsafe { | ||||||
htslib::bcf_add_filter(self.header().inner, self.inner, *flt_id as i32); | ||||||
} | ||||||
htslib::bcf_add_filter(self.header().inner, self.inner, *id as i32); | ||||||
}; | ||||||
Ok(()) | ||||||
} | ||||||
|
||||||
/// Remove the given filter from the FILTER column. | ||||||
/// | ||||||
/// # Arguments | ||||||
/// | ||||||
/// - `val` - The corresponding filter ID to remove. | ||||||
/// - `pass_on_empty` - Set to "PASS" when removing the last value. | ||||||
pub fn remove_filter(&mut self, flt_id: Id, pass_on_empty: bool) { | ||||||
/// - `flt_id` - The corresponding filter ID to remove. | ||||||
/// - `pass_on_empty` - Set to `PASS` when removing the last filter. | ||||||
/// | ||||||
/// # Example | ||||||
/// ```rust | ||||||
/// # use rust_htslib::bcf::{Format, Header, Writer}; | ||||||
/// # use tempfile::NamedTempFile; | ||||||
/// # let tmp = tempfile::NamedTempFile::new().unwrap(); | ||||||
/// # let path = tmp.path(); | ||||||
/// let mut header = Header::new(); | ||||||
/// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#); | ||||||
/// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#); | ||||||
/// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); | ||||||
/// # let mut record = vcf.empty_record(); | ||||||
/// record.set_filters(&[b"foo", b"bar"]).unwrap(); | ||||||
/// assert!(record.has_filter(b"foo")); | ||||||
/// assert!(record.has_filter(b"bar")); | ||||||
/// | ||||||
/// record.remove_filter(b"foo", true).unwrap(); | ||||||
/// assert!(!record.has_filter(b"foo")); | ||||||
/// assert!(record.has_filter(b"bar")); | ||||||
/// // 'baz' is not in the header | ||||||
/// assert!(record.remove_filter(b"baz", true).is_err()); | ||||||
/// | ||||||
/// record.remove_filter(b"bar", true).unwrap(); | ||||||
/// assert!(!record.has_filter(b"bar")); | ||||||
/// assert!(record.has_filter(b"PASS")); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe also add another example with |
||||||
/// ``` | ||||||
/// | ||||||
/// # Errors | ||||||
/// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned. | ||||||
/// | ||||||
pub fn remove_filter(&mut self, flt_id: &[u8], pass_on_empty: bool) -> Result<()> { | ||||||
let id = self.header().name_to_id(flt_id)?; | ||||||
unsafe { | ||||||
htslib::bcf_remove_filter( | ||||||
self.header().inner, | ||||||
self.inner, | ||||||
*flt_id as i32, | ||||||
*id as i32, | ||||||
pass_on_empty as i32, | ||||||
); | ||||||
} | ||||||
) | ||||||
}; | ||||||
Ok(()) | ||||||
} | ||||||
|
||||||
/// Get alleles strings. | ||||||
|
@@ -558,6 +657,37 @@ impl Record { | |||||
}) | ||||||
} | ||||||
|
||||||
/// Retrieve data for a `FORMAT` field | ||||||
/// | ||||||
/// # Example | ||||||
/// *Note: some boilerplate for the example is hidden for clarity. See [module documentation](../index.html#example-writing) | ||||||
/// for an example of the setup used here.* | ||||||
/// | ||||||
/// ```rust | ||||||
/// # use rust_htslib::bcf::{Format, Writer}; | ||||||
/// # use rust_htslib::bcf::header::Header; | ||||||
/// # | ||||||
/// # // Create minimal VCF header with a single sample | ||||||
/// # let mut header = Header::new(); | ||||||
/// header.push_sample(b"sample1").push_sample(b"sample2").push_record(br#"##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">"#); | ||||||
/// # | ||||||
/// # // Write uncompressed VCF to stdout with above header and get an empty record | ||||||
/// # let mut vcf = Writer::from_stdout(&header, true, Format::VCF).unwrap(); | ||||||
/// # let mut record = vcf.empty_record(); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe make this line visible in the docs? So that it is clear that
Suggested change
|
||||||
/// record.push_format_integer(b"DP", &[20, 12]).expect("Failed to set DP format field"); | ||||||
/// | ||||||
/// let read_depths = record.format(b"DP").integer().expect("Couldn't retrieve DP field"); | ||||||
/// let sample1_depth = read_depths[0]; | ||||||
/// assert_eq!(sample1_depth, &[20]); | ||||||
/// let sample2_depth = read_depths[1]; | ||||||
/// assert_eq!(sample2_depth, &[12]) | ||||||
/// ``` | ||||||
/// | ||||||
/// # Errors | ||||||
/// **Attention:** the returned [`BufferBacked`] from [`integer()`](Format::integer) | ||||||
/// (`read_depths`), which holds the data, has to be kept in scope as long as the data is | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does |
||||||
/// accessed. If parts of the data are accessed after the `BufferBacked` object is been | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo:
Suggested change
|
||||||
/// dropped, you will access unallocated memory. | ||||||
pub fn format<'a>(&'a self, tag: &'a [u8]) -> Format<'a, Buffer> { | ||||||
self.format_shared_buffer(tag, Buffer::new()) | ||||||
} | ||||||
|
@@ -1222,7 +1352,7 @@ impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Format<'a, B> { | |||||
/// Get format data as integers. | ||||||
/// | ||||||
/// **Attention:** the returned BufferBacked which holds the data has to be kept in scope | ||||||
/// as along as the data is accessed. If parts of the data are accessed while | ||||||
/// as long as the data is accessed. If parts of the data are accessed while | ||||||
/// the BufferBacked object is already dropped, you will access unallocated | ||||||
/// memory. | ||||||
pub fn integer(mut self) -> Result<BufferBacked<'b, Vec<&'b [i32]>, B>> { | ||||||
|
@@ -1373,4 +1503,87 @@ mod tests { | |||||
assert_eq!(record.sample_count(), 0); | ||||||
assert_eq!(record.pos(), 0) | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn test_record_has_filter_pass_is_default() { | ||||||
let tmp = NamedTempFile::new().unwrap(); | ||||||
let path = tmp.path(); | ||||||
let header = Header::new(); | ||||||
let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); | ||||||
let record = vcf.empty_record(); | ||||||
|
||||||
assert!(record.has_filter(b"PASS")); | ||||||
assert!(record.has_filter(b".")); | ||||||
assert!(!record.has_filter(b"foo")) | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn test_record_has_filter_custom() { | ||||||
let tmp = NamedTempFile::new().unwrap(); | ||||||
let path = tmp.path(); | ||||||
let mut header = Header::new(); | ||||||
header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#); | ||||||
let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); | ||||||
let mut record = vcf.empty_record(); | ||||||
record.push_filter(b"foo").unwrap(); | ||||||
|
||||||
assert!(record.has_filter(b"foo")); | ||||||
assert!(!record.has_filter(b"PASS")) | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn test_record_push_filter() { | ||||||
let tmp = NamedTempFile::new().unwrap(); | ||||||
let path = tmp.path(); | ||||||
let mut header = Header::new(); | ||||||
header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#); | ||||||
let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); | ||||||
let mut record = vcf.empty_record(); | ||||||
assert!(record.has_filter(b"PASS")); | ||||||
record.push_filter(b"foo").unwrap(); | ||||||
assert!(record.has_filter(b"foo")); | ||||||
assert!(!record.has_filter(b"PASS")); | ||||||
assert!(record.push_filter(b"bar").is_err()) | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn test_record_set_filters() { | ||||||
let tmp = NamedTempFile::new().unwrap(); | ||||||
let path = tmp.path(); | ||||||
let mut header = Header::new(); | ||||||
header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#); | ||||||
header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#); | ||||||
let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); | ||||||
let mut record = vcf.empty_record(); | ||||||
assert!(record.has_filter(b"PASS")); | ||||||
record.set_filters(&[b"foo", b"bar"]).unwrap(); | ||||||
assert!(record.has_filter(b"foo")); | ||||||
assert!(record.has_filter(b"bar")); | ||||||
assert!(!record.has_filter(b"PASS")); | ||||||
record.set_filters(&[]).unwrap(); | ||||||
assert!(record.has_filter(b"PASS")); | ||||||
assert!(!record.has_filter(b"foo")); | ||||||
assert!(record.set_filters(&[b"foo", b"baz"]).is_err()) | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn test_record_remove_filter() { | ||||||
let tmp = NamedTempFile::new().unwrap(); | ||||||
let path = tmp.path(); | ||||||
let mut header = Header::new(); | ||||||
header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#); | ||||||
header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#); | ||||||
let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap(); | ||||||
let mut record = vcf.empty_record(); | ||||||
record.set_filters(&[b"foo", b"bar"]).unwrap(); | ||||||
assert!(record.has_filter(b"foo")); | ||||||
assert!(record.has_filter(b"bar")); | ||||||
record.remove_filter(b"foo", true).unwrap(); | ||||||
assert!(!record.has_filter(b"foo")); | ||||||
assert!(record.has_filter(b"bar")); | ||||||
assert!(record.remove_filter(b"baz", true).is_err()); | ||||||
record.remove_filter(b"bar", true).unwrap(); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As above, I'd also add the |
||||||
assert!(!record.has_filter(b"bar")); | ||||||
assert!(record.has_filter(b"PASS")); | ||||||
} | ||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe rename
flt_ids
to something likeflt_names
(as inname_to_id()
)? Although ID refers to the respective field where the filter name is given in the header, it also refers to the number assigned to that filter name internally. So maybeflt_names
is less ambiguous?