Skip to content

Commit

Permalink
validators: Add validation errors
Browse files Browse the repository at this point in the history
  • Loading branch information
zaeleus committed Aug 31, 2023
1 parent 48b068c commit fb9f617
Show file tree
Hide file tree
Showing 10 changed files with 120 additions and 69 deletions.
6 changes: 1 addition & 5 deletions src/commands/lint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,7 @@ where
let _ = write!(message, "{col_no}:");
}

let _ = write!(
message,
" [{}] {}: {}",
error.code, error.name, error.message
);
let _ = write!(message, " [{}] {}: {}", error.code, error.name, error.error);

message
}
Expand Down
12 changes: 6 additions & 6 deletions src/validators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,26 +33,26 @@ pub enum LineType {
pub struct Error {
pub code: String,
pub name: String,
pub message: String,
pub error: Box<dyn std::error::Error>,
pub line_type: LineType,
pub col_no: Option<usize>,
}

impl Error {
pub fn new<I>(
pub fn new<E>(
code: &str,
name: &str,
message: I,
error: E,
line_type: LineType,
col_no: Option<usize>,
) -> Self
where
I: Into<String>,
E: Into<Box<dyn std::error::Error>>,
{
Self {
code: code.into(),
name: name.into(),
message: message.into(),
error: error.into(),
line_type,
col_no,
}
Expand All @@ -61,7 +61,7 @@ impl Error {

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "[{}] {}", self.code, self.message)
write!(f, "[{}] {}", self.code, self.error)
}
}

Expand Down
24 changes: 16 additions & 8 deletions src/validators/paired/names.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use thiserror::Error;

use crate::{
fastq::Record,
validators::{Error, LineType, PairedReadValidator, ValidationLevel},
validators::{self, LineType, PairedReadValidator, ValidationLevel},
};

/// [P001] (medium) Validator to check if each paired read name is the same, excluding interleave.
Expand All @@ -19,16 +21,15 @@ impl PairedReadValidator for NamesValidator {
ValidationLevel::Medium
}

fn validate(&self, r: &Record, s: &Record) -> Result<(), Error> {
fn validate(&self, r: &Record, s: &Record) -> Result<(), validators::Error> {
if r.name() != s.name() {
Err(Error::new(
Err(validators::Error::new(
self.code(),
self.name(),
format!(
"Names do not match (expected '{}', got '{}')",
String::from_utf8_lossy(r.name()),
String::from_utf8_lossy(s.name()),
),
ValidationError {
actual: String::from_utf8_lossy(s.name()).into(),
expected: String::from_utf8_lossy(r.name()).into(),
},
LineType::Name,
Some(1),
))
Expand All @@ -38,6 +39,13 @@ impl PairedReadValidator for NamesValidator {
}
}

#[derive(Debug, Error)]
#[error("names mismatch: expected '{actual}', got '{expected}'")]
struct ValidationError {
actual: String,
expected: String,
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
14 changes: 10 additions & 4 deletions src/validators/single/alphabet.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use thiserror::Error;

use crate::{
fastq::Record,
validators::{Error, LineType, SingleReadValidator, ValidationLevel},
validators::{self, LineType, SingleReadValidator, ValidationLevel},
};

/// [S002] (medium) Validator to check if all the characters in the sequence line are included in a
Expand Down Expand Up @@ -34,13 +36,13 @@ impl SingleReadValidator for AlphabetValidator {
ValidationLevel::Medium
}

fn validate(&self, r: &Record) -> Result<(), Error> {
fn validate(&self, r: &Record) -> Result<(), validators::Error> {
for (i, &b) in r.sequence().iter().enumerate() {
if !self.alphabet[usize::from(b)] {
return Err(Error::new(
return Err(validators::Error::new(
self.code(),
self.name(),
format!("Invalid character: {}", b as char),
ValidationError(char::from(b)),
LineType::Sequence,
Some(i + 1),
));
Expand All @@ -58,6 +60,10 @@ impl Default for AlphabetValidator {
}
}

#[derive(Debug, Error)]
#[error("invalid sequence character: '{0}'")]
struct ValidationError(char);

#[cfg(test)]
mod tests {
use super::*;
Expand Down
43 changes: 29 additions & 14 deletions src/validators/single/complete.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
use thiserror::Error;

use crate::{
fastq::Record,
validators::{Error, LineType, SingleReadValidator, ValidationLevel},
validators::{self, LineType, SingleReadValidator, ValidationLevel},
};

/// [S004] (low) Validator to check if all four record lines (name, sequence, plus line, and
/// quality) are present.
pub struct CompleteValidator;

impl CompleteValidator {
fn validate_name(&self, r: &Record) -> Result<(), Error> {
fn validate_name(&self, r: &Record) -> Result<(), validators::Error> {
if r.name().is_empty() {
Err(Error::new(
Err(validators::Error::new(
self.code(),
self.name(),
String::from("Incomplete record: name is empty"),
ValidationError::EmptyName,
LineType::Name,
Some(1),
))
Expand All @@ -22,12 +24,12 @@ impl CompleteValidator {
}
}

fn validate_sequence(&self, r: &Record) -> Result<(), Error> {
fn validate_sequence(&self, r: &Record) -> Result<(), validators::Error> {
if r.sequence().is_empty() {
Err(Error::new(
Err(validators::Error::new(
self.code(),
self.name(),
String::from("Incomplete record: sequence is empty"),
ValidationError::EmptySequence,
LineType::Sequence,
Some(1),
))
Expand All @@ -36,12 +38,12 @@ impl CompleteValidator {
}
}

fn validate_plus_line(&self, r: &Record) -> Result<(), Error> {
fn validate_plus_line(&self, r: &Record) -> Result<(), validators::Error> {
if r.plus_line().is_empty() {
Err(Error::new(
Err(validators::Error::new(
self.code(),
self.name(),
String::from("Incomplete record: plus line is empty"),
ValidationError::EmptyPlusLine,
LineType::PlusLine,
Some(1),
))
Expand All @@ -50,12 +52,12 @@ impl CompleteValidator {
}
}

fn validate_quality(&self, r: &Record) -> Result<(), Error> {
fn validate_quality(&self, r: &Record) -> Result<(), validators::Error> {
if r.quality_scores().is_empty() {
Err(Error::new(
Err(validators::Error::new(
self.code(),
self.name(),
String::from("Incomplete record: quality is empty"),
ValidationError::EmptyQuality,
LineType::Quality,
Some(1),
))
Expand All @@ -78,7 +80,7 @@ impl SingleReadValidator for CompleteValidator {
ValidationLevel::Low
}

fn validate(&self, r: &Record) -> Result<(), Error> {
fn validate(&self, r: &Record) -> Result<(), validators::Error> {
self.validate_name(r)?;
self.validate_sequence(r)?;
self.validate_plus_line(r)?;
Expand All @@ -87,6 +89,19 @@ impl SingleReadValidator for CompleteValidator {
}
}

#[allow(clippy::enum_variant_names)]
#[derive(Debug, Error)]
enum ValidationError {
#[error("empty name")]
EmptyName,
#[error("empty sequence")]
EmptySequence,
#[error("empty plus line")]
EmptyPlusLine,
#[error("empty quality")]
EmptyQuality,
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
26 changes: 16 additions & 10 deletions src/validators/single/consistent_seq_qual.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use thiserror::Error;

use crate::{
fastq::Record,
validators::{Error, LineType, SingleReadValidator, ValidationLevel},
validators::{self, LineType, SingleReadValidator, ValidationLevel},
};

/// [S005] (high) Validator to check if the sequence and quality lengths are the same.
Expand All @@ -19,18 +21,15 @@ impl SingleReadValidator for ConsistentSeqQualValidator {
ValidationLevel::High
}

fn validate(&self, r: &Record) -> Result<(), Error> {
fn validate(&self, r: &Record) -> Result<(), validators::Error> {
if r.sequence().len() != r.quality_scores().len() {
let message = format!(
"Name and quality lengths do not match (expected {}, got {})",
r.sequence().len(),
r.quality_scores().len(),
);

Err(Error::new(
Err(validators::Error::new(
self.code(),
self.name(),
message,
ValidationError {
actual: r.sequence().len(),
expected: r.quality_scores().len(),
},
LineType::Sequence,
Some(1),
))
Expand All @@ -40,6 +39,13 @@ impl SingleReadValidator for ConsistentSeqQualValidator {
}
}

#[derive(Debug, Error)]
#[error("sequence-quality scores lengths mismatch: expected {actual}, got {expected}")]
struct ValidationError {
actual: usize,
expected: usize,
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
20 changes: 11 additions & 9 deletions src/validators/single/duplicate_name.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use std::collections::HashMap;

use bbloom::ScalableBloomFilter;
use thiserror::Error;

use crate::{
fastq::Record,
validators::{Error, LineType, SingleReadValidatorMut, ValidationLevel},
validators::{self, LineType, SingleReadValidatorMut, ValidationLevel},
};

const FALSE_POSITIVE_PROBABILITY: f64 = 0.0001;
Expand Down Expand Up @@ -107,16 +108,13 @@ impl SingleReadValidatorMut for DuplicateNameValidator {
ValidationLevel::High
}

fn validate(&mut self, r: &Record) -> Result<(), Error> {
let code = self.code();
let name = self.name();

fn validate(&mut self, r: &Record) -> Result<(), validators::Error> {
if let Some(count) = self.possible_duplicates.get_mut(r.name()) {
if *count >= 1 {
return Err(Error::new(
code,
name,
format!("Duplicate found: '{}'", String::from_utf8_lossy(r.name())),
return Err(validators::Error::new(
self.code(),
self.name(),
ValidationError(String::from_utf8_lossy(r.name()).into()),
LineType::Name,
Some(1),
));
Expand All @@ -138,6 +136,10 @@ impl Default for DuplicateNameValidator {
}
}

#[derive(Debug, Error)]
#[error("duplicate name: '{0}'")]
struct ValidationError(String);

#[cfg(test)]
mod tests {
use super::*;
Expand Down
14 changes: 10 additions & 4 deletions src/validators/single/name.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use thiserror::Error;

use crate::{
fastq::Record,
validators::{Error, LineType, SingleReadValidator, ValidationLevel},
validators::{self, LineType, SingleReadValidator, ValidationLevel},
};

/// [S003] (high) Validator to check if the name line starts with an "@".
Expand All @@ -19,20 +21,24 @@ impl SingleReadValidator for NameValidator {
ValidationLevel::High
}

fn validate(&self, r: &Record) -> Result<(), Error> {
fn validate(&self, r: &Record) -> Result<(), validators::Error> {
match r.name().first() {
Some(b'@') => Ok(()),
_ => Err(Error::new(
_ => Err(validators::Error::new(
self.code(),
self.name(),
String::from("Does not start with an '@'"),
ValidationError,
LineType::Name,
Some(1),
)),
}
}
}

#[derive(Debug, Error)]
#[error("missing @ prefix")]
struct ValidationError;

#[cfg(test)]
mod tests {
use super::*;
Expand Down
Loading

0 comments on commit fb9f617

Please sign in to comment.