From 687e7b1f21b5492777cc590d65fae92a6d188dc1 Mon Sep 17 00:00:00 2001 From: Elizabeth Date: Tue, 13 Dec 2022 14:21:49 -0500 Subject: [PATCH] first line validation --- .../service/valid/SequenceValidationError.kt | 5 ++ .../mb/api/service/valid/SequenceValidator.kt | 51 +++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/rest-service/src/main/kotlin/mb/api/service/valid/SequenceValidationError.kt b/rest-service/src/main/kotlin/mb/api/service/valid/SequenceValidationError.kt index 8f0fb4ff8..f2ac69a98 100644 --- a/rest-service/src/main/kotlin/mb/api/service/valid/SequenceValidationError.kt +++ b/rest-service/src/main/kotlin/mb/api/service/valid/SequenceValidationError.kt @@ -25,4 +25,9 @@ class SequenceLengthValidationError( class SequenceEmptyValidationError : SequenceValidationError { override val message: String get() = "Empty query. Query must have 1 or more sequences." +} + +class SequenceDashesValidationError(val sequence: Int) : SequenceValidationError { + override val message: String + get() = "The first line of input sequence #$sequence contained too many dashes." } \ No newline at end of file diff --git a/rest-service/src/main/kotlin/mb/api/service/valid/SequenceValidator.kt b/rest-service/src/main/kotlin/mb/api/service/valid/SequenceValidator.kt index 5890ef187..7789c7606 100644 --- a/rest-service/src/main/kotlin/mb/api/service/valid/SequenceValidator.kt +++ b/rest-service/src/main/kotlin/mb/api/service/valid/SequenceValidator.kt @@ -2,6 +2,7 @@ package mb.api.service.valid import org.veupathdb.lib.blast.BlastTool import java.util.* +import kotlin.math.min interface SequenceValidator { /** @@ -32,6 +33,8 @@ interface SequenceValidator { * @return Whether all characters in the input {@code CharSequence} are valid. */ fun validate(sequence: Int, seq: String): SequenceValidationError? { + validateFirstLine(sequence, seq)?.let { return it } + // Input scanner val scan = Scanner(seq) // Current line number @@ -65,6 +68,54 @@ interface SequenceValidator { return null } + /** + * Validates the first line in a sequence to ensure that it passes the first + * line check performed by the BLAST+ CLI tool: + * ``` + * if (bad >= good / 3 && + * (len_to_check > 3 || good == 0 || bad > good)) + * { + * FASTA_ERROR( LineNumber(), + * "CFastaReader: Near line " << LineNumber() + * << ", there's a line that doesn't look like plausible data, " + * "but it's not marked as defline or comment.", + * CObjReaderParseException::eFormat); + * } + * ``` + */ + private fun validateFirstLine(sequence: Int, seq: String): SequenceValidationError? { + val scan = Scanner(seq) + + while (scan.hasNextLine()) { + val line = scan.nextLine() + + if (line.isNotBlank() && line[0] != '>') { + var dashes = 0 + var nonDashes = 0 + + // Count the dashes + nondashes in the line + for (i in 0 .. min(line.length, 70)) { + when (line[i]) { + '-' -> dashes++ + else -> nonDashes++ + } + } + + val checked = dashes+nonDashes + + return if ( + dashes >= nonDashes / 3 + && (checked > 3 || nonDashes == 0 || dashes > nonDashes) + ) + SequenceDashesValidationError(sequence) + else + null + } + } + + return SequenceEmptyValidationError() + } + companion object { fun getValidator(tool: BlastTool): SequenceValidator { return when (tool) {