Skip to content

Commit

Permalink
Merge pull request #211 from VEuPathDB/issue-202
Browse files Browse the repository at this point in the history
Validate dash count on first line of each sequence.
  • Loading branch information
Foxcapades authored Dec 14, 2022
2 parents 708603d + 687e7b1 commit 32df5fd
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,9 @@ class SequenceLengthValidationError(
class SequenceEmptyValidationError : SequenceValidationError {
override val message: String
get() = "Empty query. Query must have 1 or more sequences."
}

class SequenceDashesValidationError(val sequence: Int) : SequenceValidationError {
override val message: String
get() = "The first line of input sequence #$sequence contained too many dashes."
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package mb.api.service.valid

import org.veupathdb.lib.blast.BlastTool
import java.util.*
import kotlin.math.min

interface SequenceValidator {
/**
Expand Down Expand Up @@ -32,6 +33,8 @@ interface SequenceValidator {
* @return Whether all characters in the input {@code CharSequence} are valid.
*/
fun validate(sequence: Int, seq: String): SequenceValidationError? {
validateFirstLine(sequence, seq)?.let { return it }

// Input scanner
val scan = Scanner(seq)
// Current line number
Expand Down Expand Up @@ -65,6 +68,54 @@ interface SequenceValidator {
return null
}

/**
* Validates the first line in a sequence to ensure that it passes the first
* line check performed by the BLAST+ CLI tool:
* ```
* if (bad >= good / 3 &&
* (len_to_check > 3 || good == 0 || bad > good))
* {
* FASTA_ERROR( LineNumber(),
* "CFastaReader: Near line " << LineNumber()
* << ", there's a line that doesn't look like plausible data, "
* "but it's not marked as defline or comment.",
* CObjReaderParseException::eFormat);
* }
* ```
*/
private fun validateFirstLine(sequence: Int, seq: String): SequenceValidationError? {
val scan = Scanner(seq)

while (scan.hasNextLine()) {
val line = scan.nextLine()

if (line.isNotBlank() && line[0] != '>') {
var dashes = 0
var nonDashes = 0

// Count the dashes + nondashes in the line
for (i in 0 .. min(line.length, 70)) {
when (line[i]) {
'-' -> dashes++
else -> nonDashes++
}
}

val checked = dashes+nonDashes

return if (
dashes >= nonDashes / 3
&& (checked > 3 || nonDashes == 0 || dashes > nonDashes)
)
SequenceDashesValidationError(sequence)
else
null
}
}

return SequenceEmptyValidationError()
}

companion object {
fun getValidator(tool: BlastTool): SequenceValidator {
return when (tool) {
Expand Down

0 comments on commit 32df5fd

Please sign in to comment.