Skip to content

Commit

Permalink
feat: allow lower case for segments and genes
Browse files Browse the repository at this point in the history
- in amino acid insertions
- in amino acid mutations
- in nucleotide insertions
- in nucleotide mutations
  • Loading branch information
JonasKellerer committed Dec 19, 2023
1 parent 5434e49 commit 74b41bb
Show file tree
Hide file tree
Showing 12 changed files with 185 additions and 116 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package org.genspectrum.lapis.config
import com.fasterxml.jackson.annotation.JsonIgnoreProperties
import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper
import com.fasterxml.jackson.module.kotlin.readValue
import org.genspectrum.lapis.controller.BadRequestException
import java.io.File

const val REFERENCE_GENOME_SEGMENTS_APPLICATION_ARG_PREFIX = "referenceGenome.segments"
Expand All @@ -13,12 +14,19 @@ private const val ARGS_NAME = "referenceGenomeFilename"

@JsonIgnoreProperties(ignoreUnknown = true)
class ReferenceGenome(val nucleotideSequences: List<ReferenceSequence>, val genes: List<ReferenceSequence>) {
private val nucleotideSequenceNames: Map<LowercaseName, ReferenceSequence> = nucleotideSequences
.associateBy { it.name.lowercase() }
private val geneNames: Map<LowercaseName, ReferenceSequence> = genes
.associateBy { it.name.lowercase() }

fun getNucleotideSequenceFromLowercaseName(lowercaseName: LowercaseName): ReferenceSequence {
return nucleotideSequenceNames[lowercaseName]
?: throw BadRequestException("Unknown nucleotide sequence from lower case: $lowercaseName")
}

fun getGeneFromLowercaseName(lowercaseName: LowercaseName): ReferenceSequence {
return geneNames[lowercaseName]
?: throw RuntimeException("Unknown gene: $lowercaseName")
?: throw BadRequestException("Unknown gene from lower case: $lowercaseName")
}

fun isSingleSegmented(): Boolean {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@ package org.genspectrum.lapis.request
import com.fasterxml.jackson.core.JsonParser
import com.fasterxml.jackson.databind.DeserializationContext
import com.fasterxml.jackson.databind.JsonDeserializer
import org.genspectrum.lapis.config.ReferenceGenome
import org.genspectrum.lapis.controller.BadRequestException
import org.springframework.boot.jackson.JsonComponent
import org.springframework.core.convert.converter.Converter
import org.springframework.stereotype.Component

data class AminoAcidInsertion(val position: Int, val gene: String, val insertions: String) {
companion object {
fun fromString(aminoAcidInsertion: String): AminoAcidInsertion {
fun fromString(
aminoAcidInsertion: String,
referenceGenome: ReferenceGenome,
): AminoAcidInsertion {
val match = AMINO_ACID_INSERTION_REGEX.find(aminoAcidInsertion)
?: throw BadRequestException("Invalid nucleotide mutation: $aminoAcidInsertion")

Expand All @@ -21,10 +25,11 @@ data class AminoAcidInsertion(val position: Int, val gene: String, val insertion
"Invalid amino acid insertion: $aminoAcidInsertion: Did not find position",
)

val gene = matchGroups["gene"]?.value
val geneLowerCase = matchGroups["gene"]?.value?.lowercase()
?: throw BadRequestException(
"Invalid amino acid insertion: $aminoAcidInsertion: Did not find gene",
)
val geneName = referenceGenome.getGeneFromLowercaseName(geneLowerCase).name

val insertions = matchGroups["insertions"]?.value?.replace(
LAPIS_INSERTION_AMBIGUITY_SYMBOL,
Expand All @@ -36,7 +41,7 @@ data class AminoAcidInsertion(val position: Int, val gene: String, val insertion

return AminoAcidInsertion(
position,
gene,
geneName,
insertions,
)
}
Expand All @@ -49,14 +54,18 @@ private val AMINO_ACID_INSERTION_REGEX =
)

@JsonComponent
class AminoAcidInsertionDeserializer : JsonDeserializer<AminoAcidInsertion>() {
class AminoAcidInsertionDeserializer(
private val referenceGenome: ReferenceGenome,
) : JsonDeserializer<AminoAcidInsertion>() {
override fun deserialize(
p: JsonParser,
ctxt: DeserializationContext,
) = AminoAcidInsertion.fromString(p.valueAsString)
) = AminoAcidInsertion.fromString(p.valueAsString, referenceGenome)
}

@Component
class StringToAminoAcidInsertionConverter : Converter<String, AminoAcidInsertion> {
override fun convert(source: String) = AminoAcidInsertion.fromString(source)
class StringToAminoAcidInsertionConverter(
private val referenceGenome: ReferenceGenome,
) : Converter<String, AminoAcidInsertion> {
override fun convert(source: String) = AminoAcidInsertion.fromString(source, referenceGenome)
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,34 @@ package org.genspectrum.lapis.request
import com.fasterxml.jackson.core.JsonParser
import com.fasterxml.jackson.databind.DeserializationContext
import com.fasterxml.jackson.databind.JsonDeserializer
import org.genspectrum.lapis.config.ReferenceGenome
import org.genspectrum.lapis.controller.BadRequestException
import org.springframework.boot.jackson.JsonComponent
import org.springframework.core.convert.converter.Converter
import org.springframework.stereotype.Component

data class AminoAcidMutation(val gene: String, val position: Int, val symbol: String?) {
companion object {
fun fromString(aminoAcidMutation: String): AminoAcidMutation {
fun fromString(
aminoAcidMutation: String,
referenceGenome: ReferenceGenome,
): AminoAcidMutation {
val match = AMINO_ACID_MUTATION_REGEX.find(aminoAcidMutation)
?: throw BadRequestException("Invalid amino acid mutation: $aminoAcidMutation")

val matchGroups = match.groups

val gene = matchGroups["gene"]?.value
val geneLowerCase = matchGroups["gene"]?.value?.lowercase()
?: throw BadRequestException("Invalid amino acid mutation: $aminoAcidMutation: Did not find gene")
val geneName = referenceGenome.getGeneFromLowercaseName(geneLowerCase).name

val position = matchGroups["position"]?.value?.toInt()
?: throw BadRequestException(
"Invalid amino acid mutation: $aminoAcidMutation: Did not find position",
)

return AminoAcidMutation(
gene,
geneName,
position,
matchGroups["symbolTo"]?.value?.uppercase(),
)
Expand All @@ -38,14 +44,18 @@ private val AMINO_ACID_MUTATION_REGEX =
)

@JsonComponent
class AminoAcidMutationDeserializer : JsonDeserializer<AminoAcidMutation>() {
class AminoAcidMutationDeserializer(
private val referenceGenome: ReferenceGenome,
) : JsonDeserializer<AminoAcidMutation>() {
override fun deserialize(
p: JsonParser,
ctxt: DeserializationContext,
) = AminoAcidMutation.fromString(p.valueAsString)
) = AminoAcidMutation.fromString(p.valueAsString, referenceGenome)
}

@Component
class StringToAminoAcidMutationConverter : Converter<String, AminoAcidMutation> {
override fun convert(source: String) = AminoAcidMutation.fromString(source)
class StringToAminoAcidMutationConverter(
private val referenceGenome: ReferenceGenome,
) : Converter<String, AminoAcidMutation> {
override fun convert(source: String) = AminoAcidMutation.fromString(source, referenceGenome)
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package org.genspectrum.lapis.request
import com.fasterxml.jackson.core.JsonParser
import com.fasterxml.jackson.databind.DeserializationContext
import com.fasterxml.jackson.databind.JsonDeserializer
import org.genspectrum.lapis.config.ReferenceGenome
import org.genspectrum.lapis.controller.BadRequestException
import org.springframework.boot.jackson.JsonComponent
import org.springframework.core.convert.converter.Converter
Expand All @@ -13,7 +14,10 @@ const val SILO_INSERTION_AMBIGUITY_SYMBOL = ".*"

data class NucleotideInsertion(val position: Int, val insertions: String, val segment: String?) {
companion object {
fun fromString(nucleotideInsertion: String): NucleotideInsertion {
fun fromString(
nucleotideInsertion: String,
referenceGenome: ReferenceGenome,
): NucleotideInsertion {
val match = NUCLEOTIDE_INSERTION_REGEX.find(nucleotideInsertion)
?: throw BadRequestException("Invalid nucleotide mutation: $nucleotideInsertion")

Expand All @@ -32,10 +36,13 @@ data class NucleotideInsertion(val position: Int, val insertions: String, val se
"Invalid nucleotide insertion: $nucleotideInsertion: Did not find insertions",
)

val segmentName = matchGroups["segment"]?.value?.lowercase()
?.let { referenceGenome.getNucleotideSequenceFromLowercaseName(it).name }

return NucleotideInsertion(
position,
insertions,
matchGroups["segment"]?.value,
segmentName,
)
}
}
Expand All @@ -47,14 +54,17 @@ private val NUCLEOTIDE_INSERTION_REGEX =
)

@JsonComponent
class NucleotideInsertionDeserializer : JsonDeserializer<NucleotideInsertion>() {
class NucleotideInsertionDeserializer(private val referenceGenome: ReferenceGenome) :
JsonDeserializer<NucleotideInsertion>() {
override fun deserialize(
p: JsonParser,
ctxt: DeserializationContext,
) = NucleotideInsertion.fromString(p.valueAsString)
) = NucleotideInsertion.fromString(p.valueAsString, referenceGenome)
}

@Component
class StringToNucleotideInsertionConverter : Converter<String, NucleotideInsertion> {
override fun convert(source: String) = NucleotideInsertion.fromString(source)
class StringToNucleotideInsertionConverter(
private val referenceGenome: ReferenceGenome,
) : Converter<String, NucleotideInsertion> {
override fun convert(source: String) = NucleotideInsertion.fromString(source, referenceGenome)
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@ package org.genspectrum.lapis.request
import com.fasterxml.jackson.core.JsonParser
import com.fasterxml.jackson.databind.DeserializationContext
import com.fasterxml.jackson.databind.JsonDeserializer
import org.genspectrum.lapis.config.ReferenceGenome
import org.genspectrum.lapis.controller.BadRequestException
import org.springframework.boot.jackson.JsonComponent
import org.springframework.core.convert.converter.Converter
import org.springframework.stereotype.Component

data class NucleotideMutation(val sequenceName: String?, val position: Int, val symbol: String?) {
companion object {
fun fromString(nucleotideMutation: String): NucleotideMutation {
fun fromString(
nucleotideMutation: String,
referenceGenome: ReferenceGenome,
): NucleotideMutation {
val match = NUCLEOTIDE_MUTATION_REGEX.find(nucleotideMutation)
?: throw BadRequestException("Invalid nucleotide mutation: $nucleotideMutation")

Expand All @@ -21,8 +25,11 @@ data class NucleotideMutation(val sequenceName: String?, val position: Int, val
"Invalid nucleotide mutation: $nucleotideMutation: Did not find position",
)

val segmentName = matchGroups["sequenceName"]?.value?.lowercase()
?.let { referenceGenome.getNucleotideSequenceFromLowercaseName(it).name }

return NucleotideMutation(
matchGroups["sequenceName"]?.value,
segmentName,
position,
matchGroups["symbolTo"]?.value?.uppercase(),
)
Expand All @@ -37,14 +44,17 @@ private val NUCLEOTIDE_MUTATION_REGEX =
)

@JsonComponent
class NucleotideMutationDeserializer : JsonDeserializer<NucleotideMutation>() {
class NucleotideMutationDeserializer(
private val referenceGenome: ReferenceGenome,
) : JsonDeserializer<NucleotideMutation>() {
override fun deserialize(
p: JsonParser,
ctxt: DeserializationContext,
) = NucleotideMutation.fromString(p.valueAsString)
) = NucleotideMutation.fromString(p.valueAsString, referenceGenome)
}

@Component
class StringToNucleotideMutationConverter : Converter<String, NucleotideMutation> {
override fun convert(source: String) = NucleotideMutation.fromString(source)
class StringToNucleotideMutationConverter(private val referenceGenome: ReferenceGenome) :
Converter<String, NucleotideMutation> {
override fun convert(source: String) = NucleotideMutation.fromString(source, referenceGenome)
}
Original file line number Diff line number Diff line change
Expand Up @@ -320,14 +320,14 @@ class LapisControllerCommonFieldsTest(
emptyMap(),
emptyList(),
emptyList(),
listOf(NucleotideInsertion(123, "ABC", null), NucleotideInsertion(124, "DEF", "segment")),
listOf(NucleotideInsertion(123, "ABC", null), NucleotideInsertion(124, "DEF", "other_segment")),
emptyList(),
emptyList(),
),
)
} returns listOf(AggregationData(5, emptyMap()))

mockMvc.perform(getSample("$AGGREGATED_ROUTE?nucleotideInsertions=ins_123:ABC,ins_segment:124:DEF"))
mockMvc.perform(getSample("$AGGREGATED_ROUTE?nucleotideInsertions=ins_123:ABC,ins_other_segment:124:DEF"))
.andExpect(status().isOk)
.andExpect(jsonPath("\$.data[0].count").value(5))
}
Expand All @@ -341,13 +341,13 @@ class LapisControllerCommonFieldsTest(
emptyList(),
emptyList(),
emptyList(),
listOf(AminoAcidInsertion(123, "S", "ABC"), AminoAcidInsertion(124, "ORF1", "DEF")),
listOf(AminoAcidInsertion(123, "gene1", "ABC"), AminoAcidInsertion(124, "gene2", "DEF")),
emptyList(),
),
)
} returns listOf(AggregationData(5, emptyMap()))

mockMvc.perform(getSample("$AGGREGATED_ROUTE?aminoAcidInsertions=ins_S:123:ABC,ins_ORF1:124:DEF"))
mockMvc.perform(getSample("$AGGREGATED_ROUTE?aminoAcidInsertions=ins_gene1:123:ABC,ins_gene2:124:DEF"))
.andExpect(status().isOk)
.andExpect(jsonPath("\$.data[0].count").value(5))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,48 +41,52 @@ class AminoAcidInsertionTest {
fun getAminoAcidInsertionWithValidSyntax() =
listOf(
Arguments.of(
"\"ins_gene:123:ABCD\"",
AminoAcidInsertion(123, "gene", "ABCD"),
"\"ins_gene1:123:ABCD\"",
AminoAcidInsertion(123, "gene1", "ABCD"),
),
Arguments.of(
"\"ins_gene:123:A\"",
AminoAcidInsertion(123, "gene", "A"),
"\"ins_gene1:123:A\"",
AminoAcidInsertion(123, "gene1", "A"),
),
Arguments.of(
"\"ins_gene:123:AB?CD\"",
AminoAcidInsertion(123, "gene", "AB.*CD"),
"\"ins_gene1:123:AB?CD\"",
AminoAcidInsertion(123, "gene1", "AB.*CD"),
),
Arguments.of(
"\"ins_gene:123:???\"",
AminoAcidInsertion(123, "gene", ".*.*.*"),
"\"ins_gene1:123:???\"",
AminoAcidInsertion(123, "gene1", ".*.*.*"),
),
Arguments.of(
"\"ins_gene:123:?\"",
AminoAcidInsertion(123, "gene", ".*"),
"\"ins_gene1:123:?\"",
AminoAcidInsertion(123, "gene1", ".*"),
),
Arguments.of(
"\"ins_gene:123:.*CD\"",
AminoAcidInsertion(123, "gene", ".*CD"),
"\"ins_gene1:123:.*CD\"",
AminoAcidInsertion(123, "gene1", ".*CD"),
),
Arguments.of(
"\"ins_gene:123:AB.*.*\"",
AminoAcidInsertion(123, "gene", "AB.*.*"),
"\"ins_gene1:123:AB.*.*\"",
AminoAcidInsertion(123, "gene1", "AB.*.*"),
),
Arguments.of(
"\"ins_gene:123:?CD\"",
AminoAcidInsertion(123, "gene", ".*CD"),
"\"ins_gene1:123:?CD\"",
AminoAcidInsertion(123, "gene1", ".*CD"),
),
Arguments.of(
"\"ins_gene:123:AB??\"",
AminoAcidInsertion(123, "gene", "AB.*.*"),
"\"ins_gene1:123:AB??\"",
AminoAcidInsertion(123, "gene1", "AB.*.*"),
),
Arguments.of(
"\"ins_gene:123:AB.*?CD\"",
AminoAcidInsertion(123, "gene", "AB.*.*CD"),
"\"ins_gene1:123:AB.*?CD\"",
AminoAcidInsertion(123, "gene1", "AB.*.*CD"),
),
Arguments.of(
"\"ins_gene:123:abCd\"",
AminoAcidInsertion(123, "gene", "ABCD"),
"\"ins_gene1:123:abCd\"",
AminoAcidInsertion(123, "gene1", "ABCD"),
),
Arguments.of(
"\"ins_gEne1:123:ABCD\"",
AminoAcidInsertion(123, "gene1", "ABCD"),
),
)

Expand All @@ -91,10 +95,11 @@ class AminoAcidInsertionTest {
listOf(
Arguments.of("\"ins_::123:G\""),
Arguments.of("\"ins_:123:\""),
Arguments.of("\"ins_gene:123:\""),
Arguments.of("\"ins_gene:gene:123:ABC\""),
Arguments.of("\"ins_gene1:123:\""),
Arguments.of("\"ins_gene1:gene:123:ABC\""),
Arguments.of("\"ins_123:ABCD\""),
Arguments.of("\"ins_gene\$name&with/invalid)chars:123:A\""),
Arguments.of("\"ins_gene1\$name&with/invalid)chars:123:A\""),
Arguments.of("\"ins_notInReferenceGenome:123:ABCD\""),
)
}
}
Loading

0 comments on commit 74b41bb

Please sign in to comment.