Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: Improve bcf Record filter interface and improve docs #306

Merged
merged 10 commits into from
Jul 6, 2021
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ This project adheres to [Semantic Versioning](http://semver.org/).
### Added
- `bcf::Record` methods `end`, `clear`, and `rlen` (@mbhall88)

### Changes
- `bcf::Record` methods `has_filter`, `remove_filter`, `push_filter`, and `set_filter`
all now take a byte slice (`&[u8]`) instead of an `Id`.

[Unreleased]: https://github.com/rust-bio/rust-htslib/compare/v0.36.0...HEAD

## [0.36.0] - 2020-11-23
Expand Down
9 changes: 4 additions & 5 deletions src/bcf/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1230,7 +1230,6 @@ mod tests {
Format::VCF,
)
.expect("Error opening file.");
let header = writer.header().clone();

// Setup empty record, filled below.
let mut record = writer.empty_record();
Expand All @@ -1254,10 +1253,10 @@ mod tests {
record.push_id(b"first_id").unwrap();

assert!(record.filters().next().is_none());
record.set_filters(&[header.name_to_id(b"q10").unwrap()]);
record.push_filter(header.name_to_id(b"s50").unwrap());
record.remove_filter(header.name_to_id(b"q10").unwrap(), true);
record.push_filter(header.name_to_id(b"q10").unwrap());
record.set_filters(&[b"q10"]).unwrap();
record.push_filter(b"s50").unwrap();
record.remove_filter(b"q10", true).unwrap();
record.push_filter(b"q10").unwrap();

record.set_alleles(&[b"C", b"T", b"G"]).unwrap();

Expand Down
279 changes: 246 additions & 33 deletions src/bcf/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,69 +335,168 @@ impl Record {

/// Query whether the filter with the given ID has been set.
///
/// # Arguments
/// This method can be used to check if a record passes filtering by using either `PASS` or `.`
///
/// - `flt_id` - The filter ID to query for.
pub fn has_filter(&self, flt_id: Id) -> bool {
if *flt_id == 0 && self.inner().d.n_flt == 0 {
return true;
}
for i in 0..(self.inner().d.n_flt as isize) {
if unsafe { *self.inner().d.flt.offset(i) } == *flt_id as i32 {
return true;
}
/// # Example
/// ```rust
/// # use rust_htslib::bcf::{Format, Header, Writer};
/// # use tempfile::NamedTempFile;
/// # let tmp = tempfile::NamedTempFile::new().unwrap();
/// # let path = tmp.path();
/// let mut header = Header::new();
/// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
/// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap();
/// # let mut record = vcf.empty_record();
/// assert!(record.has_filter(b"PASS"));
/// assert!(record.has_filter(b"."));
///
/// record.push_filter(b"foo").unwrap();
/// assert!(record.has_filter(b"foo"));
/// assert!(!record.has_filter(b"PASS"))
/// ```
pub fn has_filter(&self, flt_id: &[u8]) -> bool {
let filter_c_str = ffi::CString::new(flt_id).unwrap();
match unsafe {
htslib::bcf_has_filter(
self.header().inner,
self.inner,
filter_c_str.as_ptr() as *mut i8,
)
} {
1 => true,
_ => false,
}
false
}

/// Set the given filters IDs to the FILTER column.
/// Set the given filter IDs to the FILTER column.
///
/// Setting an empty slice removes all filters.
/// Setting an empty slice removes all filters and sets `PASS`.
///
/// # Arguments
/// # Example
/// ```rust
/// # use rust_htslib::bcf::{Format, Header, Writer};
/// # use tempfile::NamedTempFile;
/// # let tmp = tempfile::NamedTempFile::new().unwrap();
/// # let path = tmp.path();
/// let mut header = Header::new();
/// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
/// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
/// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap();
/// # let mut record = vcf.empty_record();
/// assert!(record.has_filter(b"PASS"));
/// record.set_filters(&[b"foo", b"bar"]).unwrap();
/// assert!(record.has_filter(b"foo"));
/// assert!(record.has_filter(b"bar"));
/// assert!(!record.has_filter(b"PASS"));
/// record.set_filters(&[]).unwrap();
/// assert!(record.has_filter(b"PASS"));
/// assert!(!record.has_filter(b"foo"));
/// // 'baz' isn't in the header
/// assert!(record.set_filters(&[b"foo", b"baz"]).is_err())
/// ```
///
/// # Errors
/// If any of the filter IDs do not exist in the header, an [`Error::BcfUnknownID`] is returned.
///
/// - `flt_ids` - The identifiers of the filter values to set.
pub fn set_filters(&mut self, flt_ids: &[Id]) {
let mut flt_ids: Vec<i32> = flt_ids.iter().map(|x| **x as i32).collect();
pub fn set_filters(&mut self, flt_ids: &[&[u8]]) -> Result<()> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe rename flt_ids to something like flt_names (as in name_to_id())? Although ID refers to the respective field where the filter name is given in the header, it also refers to the number assigned to that filter name internally. So maybe flt_names is less ambiguous?

// let id = self.header().name_to_id(flt_id)?;
let mut ids: Vec<i32> = flt_ids
.iter()
.map(|id| self.header().name_to_id(id).and_then(|id| Ok(*id as i32)))
.collect::<Result<Vec<i32>>>()?;
unsafe {
htslib::bcf_update_filter(
self.header().inner,
self.inner,
flt_ids.as_mut_ptr(),
flt_ids.len() as i32,
ids.as_mut_ptr(),
ids.len() as i32,
);
}
Ok(())
}

/// Add the given filter to the FILTER column.
///
/// If `val` corresponds to `"PASS"` then all existing filters are removed first. If other than
/// `"PASS"`, then existing `"PASS"` is removed.
/// If `flt_id` is `PASS` or `.` then all existing filters are removed first. Otherwise,
/// any existing `PASS` filter is removed.
///
/// # Arguments
/// # Example
/// ```rust
/// # use rust_htslib::bcf::{Format, Header, Writer};
/// # use tempfile::NamedTempFile;
/// # let tmp = tempfile::NamedTempFile::new().unwrap();
/// # let path = tmp.path();
/// let mut header = Header::new();
/// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
/// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap();
/// # let mut record = vcf.empty_record();
/// assert!(record.has_filter(b"PASS"));
///
/// record.push_filter(b"foo").unwrap();
/// assert!(record.has_filter(b"foo"));
/// assert!(!record.has_filter(b"PASS"));
/// // filter must exist in the header
/// assert!(record.push_filter(b"bar").is_err())
/// ```
///
/// - `flt_id` - The corresponding filter ID value to add.
pub fn push_filter(&mut self, flt_id: Id) {
/// # Errors
/// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned.
///
pub fn push_filter(&mut self, flt_id: &[u8]) -> Result<()> {
let id = self.header().name_to_id(flt_id)?;
unsafe {
htslib::bcf_add_filter(self.header().inner, self.inner, *flt_id as i32);
}
htslib::bcf_add_filter(self.header().inner, self.inner, *id as i32);
};
Ok(())
}

/// Remove the given filter from the FILTER column.
///
/// # Arguments
///
/// - `val` - The corresponding filter ID to remove.
/// - `pass_on_empty` - Set to "PASS" when removing the last value.
pub fn remove_filter(&mut self, flt_id: Id, pass_on_empty: bool) {
/// - `flt_id` - The corresponding filter ID to remove.
/// - `pass_on_empty` - Set to `PASS` when removing the last filter.
///
/// # Example
/// ```rust
/// # use rust_htslib::bcf::{Format, Header, Writer};
/// # use tempfile::NamedTempFile;
/// # let tmp = tempfile::NamedTempFile::new().unwrap();
/// # let path = tmp.path();
/// let mut header = Header::new();
/// header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
/// header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
/// # let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap();
/// # let mut record = vcf.empty_record();
/// record.set_filters(&[b"foo", b"bar"]).unwrap();
/// assert!(record.has_filter(b"foo"));
/// assert!(record.has_filter(b"bar"));
///
/// record.remove_filter(b"foo", true).unwrap();
/// assert!(!record.has_filter(b"foo"));
/// assert!(record.has_filter(b"bar"));
/// // 'baz' is not in the header
/// assert!(record.remove_filter(b"baz", true).is_err());
///
/// record.remove_filter(b"bar", true).unwrap();
/// assert!(!record.has_filter(b"bar"));
/// assert!(record.has_filter(b"PASS"));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe also add another example with pass_on_empty=false?

/// ```
///
/// # Errors
/// If the `flt_id` does not exist in the header, an [`Error::BcfUnknownID`] is returned.
///
pub fn remove_filter(&mut self, flt_id: &[u8], pass_on_empty: bool) -> Result<()> {
let id = self.header().name_to_id(flt_id)?;
unsafe {
htslib::bcf_remove_filter(
self.header().inner,
self.inner,
*flt_id as i32,
*id as i32,
pass_on_empty as i32,
);
}
)
};
Ok(())
}

/// Get alleles strings.
Expand Down Expand Up @@ -558,6 +657,37 @@ impl Record {
})
}

/// Retrieve data for a `FORMAT` field
///
/// # Example
/// *Note: some boilerplate for the example is hidden for clarity. See [module documentation](../index.html#example-writing)
/// for an example of the setup used here.*
///
/// ```rust
/// # use rust_htslib::bcf::{Format, Writer};
/// # use rust_htslib::bcf::header::Header;
/// #
/// # // Create minimal VCF header with a single sample
/// # let mut header = Header::new();
/// header.push_sample(b"sample1").push_sample(b"sample2").push_record(br#"##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">"#);
/// #
/// # // Write uncompressed VCF to stdout with above header and get an empty record
/// # let mut vcf = Writer::from_stdout(&header, true, Format::VCF).unwrap();
/// # let mut record = vcf.empty_record();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe make this line visible in the docs? So that it is clear that record is an empty_record() to start with.

Suggested change
/// # let mut record = vcf.empty_record();
/// let mut record = vcf.empty_record();

/// record.push_format_integer(b"DP", &[20, 12]).expect("Failed to set DP format field");
///
/// let read_depths = record.format(b"DP").integer().expect("Couldn't retrieve DP field");
/// let sample1_depth = read_depths[0];
/// assert_eq!(sample1_depth, &[20]);
/// let sample2_depth = read_depths[1];
/// assert_eq!(sample2_depth, &[12])
/// ```
///
/// # Errors
/// **Attention:** the returned [`BufferBacked`] from [`integer()`](Format::integer)
/// (`read_depths`), which holds the data, has to be kept in scope as long as the data is
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does has to be kept in scope mean for the user? Do I have to manually make sure, that it is in scope? Or will the lifetime of the resulting buffer do this for me and have cargo complain if this is not ensured?

/// accessed. If parts of the data are accessed after the `BufferBacked` object is been
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo:

Suggested change
/// accessed. If parts of the data are accessed after the `BufferBacked` object is been
/// accessed. If parts of the data are accessed after the `BufferBacked` object has been

/// dropped, you will access unallocated memory.
pub fn format<'a>(&'a self, tag: &'a [u8]) -> Format<'a, Buffer> {
self.format_shared_buffer(tag, Buffer::new())
}
Expand Down Expand Up @@ -1222,7 +1352,7 @@ impl<'a, 'b, B: BorrowMut<Buffer> + Borrow<Buffer> + 'b> Format<'a, B> {
/// Get format data as integers.
///
/// **Attention:** the returned BufferBacked which holds the data has to be kept in scope
/// as along as the data is accessed. If parts of the data are accessed while
/// as long as the data is accessed. If parts of the data are accessed while
/// the BufferBacked object is already dropped, you will access unallocated
/// memory.
pub fn integer(mut self) -> Result<BufferBacked<'b, Vec<&'b [i32]>, B>> {
Expand Down Expand Up @@ -1373,4 +1503,87 @@ mod tests {
assert_eq!(record.sample_count(), 0);
assert_eq!(record.pos(), 0)
}

#[test]
fn test_record_has_filter_pass_is_default() {
let tmp = NamedTempFile::new().unwrap();
let path = tmp.path();
let header = Header::new();
let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap();
let record = vcf.empty_record();

assert!(record.has_filter(b"PASS"));
assert!(record.has_filter(b"."));
assert!(!record.has_filter(b"foo"))
}

#[test]
fn test_record_has_filter_custom() {
let tmp = NamedTempFile::new().unwrap();
let path = tmp.path();
let mut header = Header::new();
header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap();
let mut record = vcf.empty_record();
record.push_filter(b"foo").unwrap();

assert!(record.has_filter(b"foo"));
assert!(!record.has_filter(b"PASS"))
}

#[test]
fn test_record_push_filter() {
let tmp = NamedTempFile::new().unwrap();
let path = tmp.path();
let mut header = Header::new();
header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap();
let mut record = vcf.empty_record();
assert!(record.has_filter(b"PASS"));
record.push_filter(b"foo").unwrap();
assert!(record.has_filter(b"foo"));
assert!(!record.has_filter(b"PASS"));
assert!(record.push_filter(b"bar").is_err())
}

#[test]
fn test_record_set_filters() {
let tmp = NamedTempFile::new().unwrap();
let path = tmp.path();
let mut header = Header::new();
header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap();
let mut record = vcf.empty_record();
assert!(record.has_filter(b"PASS"));
record.set_filters(&[b"foo", b"bar"]).unwrap();
assert!(record.has_filter(b"foo"));
assert!(record.has_filter(b"bar"));
assert!(!record.has_filter(b"PASS"));
record.set_filters(&[]).unwrap();
assert!(record.has_filter(b"PASS"));
assert!(!record.has_filter(b"foo"));
assert!(record.set_filters(&[b"foo", b"baz"]).is_err())
}

#[test]
fn test_record_remove_filter() {
let tmp = NamedTempFile::new().unwrap();
let path = tmp.path();
let mut header = Header::new();
header.push_record(br#"##FILTER=<ID=foo,Description="sample is a foo fighter">"#);
header.push_record(br#"##FILTER=<ID=bar,Description="a horse walks into...">"#);
let vcf = Writer::from_path(path, &header, true, Format::VCF).unwrap();
let mut record = vcf.empty_record();
record.set_filters(&[b"foo", b"bar"]).unwrap();
assert!(record.has_filter(b"foo"));
assert!(record.has_filter(b"bar"));
record.remove_filter(b"foo", true).unwrap();
assert!(!record.has_filter(b"foo"));
assert!(record.has_filter(b"bar"));
assert!(record.remove_filter(b"baz", true).is_err());
record.remove_filter(b"bar", true).unwrap();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above, I'd also add the false case.

assert!(!record.has_filter(b"bar"));
assert!(record.has_filter(b"PASS"));
}
}