Skip to content

Commit

Permalink
feat: add simple varinatQuery
Browse files Browse the repository at this point in the history
  • Loading branch information
JonasKellerer committed Apr 27, 2023
1 parent 77e97bc commit 3a7a7a2
Show file tree
Hide file tree
Showing 9 changed files with 254 additions and 20 deletions.
15 changes: 15 additions & 0 deletions lapis2/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ plugins {
id 'org.jetbrains.kotlin.plugin.spring' version '1.8.20'
id 'org.jlleitschuh.gradle.ktlint' version "11.3.1"
id 'org.springdoc.openapi-gradle-plugin' version "1.6.0"
id 'antlr'
}

group = 'org.genspectrum'
Expand All @@ -27,6 +28,8 @@ dependencies {
implementation 'com.fasterxml.jackson.module:jackson-module-kotlin'
implementation 'org.springdoc:springdoc-openapi-starter-webmvc-ui:2.1.0'
implementation 'io.github.microutils:kotlin-logging-jvm:3.0.5'
antlr 'org.antlr:antlr4:4.11.1'
implementation 'org.antlr:antlr4-runtime:4.11.1'

testImplementation('org.springframework.boot:spring-boot-starter-test') {
exclude group: "org.mockito"
Expand All @@ -35,14 +38,26 @@ dependencies {
testImplementation 'org.mock-server:mockserver-netty:5.15.0'
}


compileKotlin {
dependsOn generateGrammarSource
compilerOptions {
freeCompilerArgs.add("-Xexport-kdoc")
jvmTarget.set(JvmTarget.JVM_19)
}
}

tasks.named('runKtlintCheckOverMainSourceSet') {
mustRunAfter("generateGrammarSource")
}

tasks.named('compileTestKotlin') {
mustRunAfter("generateGrammarSource")
mustRunAfter("generateTestGrammarSource")
}

tasks.named('test') {
dependsOn generateGrammarSource
useJUnitPlatform()
testLogging {
events TestLogEvent.FAILED
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
grammar VariantQuery;

// parser rules

start: expr EOF;
expr:
single # Uni
| expr '&' expr # And
;

single:
nucleotide_mutation
;

nucleotide_mutation : nucleotide_symbol? position ambigous_nucleotide_symbol?;

position: NUMBER+;
nucleotide_symbol: A | C | G | T;
ambigous_nucleotide_symbol: nucleotide_symbol | M | R | W | S | Y | K | V | H | D | B | N | MINUS | DOT;

// lexer rules

A: 'A';
B: 'B';
C: 'C';
D: 'D';
E: 'E';
F: 'F';
G: 'G';
H: 'H';
I: 'I';
J: 'J';
K: 'K';
L: 'L';
M: 'M';
N: 'N';
O: 'O';
P: 'P';
Q: 'Q';
R: 'R';
S: 'S';
T: 'T';
U: 'U';
V: 'V';
W: 'W';
X: 'X';
Y: 'Y';
Z: 'Z';
MINUS: '-';
DOT: '.';

NUMBER: [0-9];
WHITESPACE: [ \r\n\t] -> skip;
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@ package org.genspectrum.lapis.config

data class DatabaseConfig(val schema: DatabaseSchema)

data class DatabaseSchema(val instanceName: String, val metadata: List<DatabaseMetadata>, val primaryKey: String)
data class DatabaseSchema(
val instanceName: String,
val metadata: List<DatabaseMetadata>,
val primaryKey: String,
val features: List<DatabaseFeature>,
)

data class DatabaseMetadata(val name: String, val type: String)

data class DatabaseFeature(val name: String)
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@ data class SequenceFilterFields(val fields: Map<FieldName, SequenceFilterFieldTy
fields = databaseConfig.schema.metadata
.map(::mapToSequenceFilterFields)
.flatten()
.toMap() + nucleotideMutationsField,
.toMap() +
databaseConfig.schema.features
.map(::mapToSequenceFilterFieldsFromFeatures)
.flatten()
.toMap() +
nucleotideMutationsField,
)
}
}
Expand All @@ -29,11 +34,19 @@ private fun mapToSequenceFilterFields(databaseMetadata: DatabaseMetadata) = when
)
}

private fun mapToSequenceFilterFieldsFromFeatures(databaseFeature: DatabaseFeature) = when (databaseFeature.name) {
"sarsCoV2VariantQuery" -> listOf(databaseFeature.name to SequenceFilterFieldType.VariantQuery)
else -> throw IllegalArgumentException(
"Unknown feature '${databaseFeature.name}'",
)
}

sealed class SequenceFilterFieldType(val openApiType: kotlin.String) {
object String : SequenceFilterFieldType("string")
object PangoLineage : SequenceFilterFieldType("string")
object Date : SequenceFilterFieldType("string")
object MutationsList : SequenceFilterFieldType("string")
object VariantQuery : SequenceFilterFieldType("string")
data class DateFrom(val associatedField: kotlin.String) : SequenceFilterFieldType("string")
data class DateTo(val associatedField: kotlin.String) : SequenceFilterFieldType("string")
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,44 +15,57 @@ import java.time.format.DateTimeParseException

data class SequenceFilterValue(val type: SequenceFilterFieldType, val value: String, val originalKey: String)

typealias SequenceFilterFieldName = String

@Component
class SiloFilterExpressionMapper(private val allowedSequenceFilterFields: SequenceFilterFields) {
fun map(sequenceFilters: Map<String, String>): SiloFilterExpression {
if (sequenceFilters.isEmpty()) {
return True
}

val filterExpressions = sequenceFilters
val allowedSequenceFiltersWithType = sequenceFilters
.map { (key, value) ->
val nullableType = allowedSequenceFilterFields.fields[key]
val (filterExpressionId, type) = mapToFilterExpressionIdentifier(nullableType, key)
filterExpressionId to SequenceFilterValue(type, value, key)
}
.groupBy({ it.first }, { it.second })
.map { (key, values) ->
val (siloColumnName, siloFilterPrimitive) = key
when (siloFilterPrimitive) {
SiloFilterPrimitive.StringEquals -> StringEquals(siloColumnName, values[0].value)
SiloFilterPrimitive.PangoLineage -> mapToPangoLineageFilter(siloColumnName, values[0].value)
SiloFilterPrimitive.DateBetween -> mapToDateBetweenFilter(siloColumnName, values)
SiloFilterPrimitive.NucleotideSymbolEquals -> mapToNucleotideFilter(values[0].value)
}

if (allowedSequenceFiltersWithType.keys.any { it.second == Filter.VariantQuery } &&
allowedSequenceFiltersWithType.keys.any { it.second in variantQueryTypes }
) {
throw IllegalArgumentException(
"variantQuery cannot be used with other variant filters",
)
}

val filterExpressions = allowedSequenceFiltersWithType.map { (key, values) ->
val (siloColumnName, filter) = key
when (filter) {
Filter.StringEquals -> StringEquals(siloColumnName, values[0].value)
Filter.PangoLineage -> mapToPangoLineageFilter(siloColumnName, values[0].value)
Filter.DateBetween -> mapToDateBetweenFilter(siloColumnName, values)
Filter.NucleotideSymbolEquals -> mapToNucleotideFilter(values[0].value)
Filter.VariantQuery -> mapToVariantQueryFilter(values[0].value)
}
}

return And(filterExpressions)
}

private fun mapToFilterExpressionIdentifier(
type: SequenceFilterFieldType?,
key: String,
): Pair<Pair<String, SiloFilterPrimitive>, SequenceFilterFieldType> {
key: SequenceFilterFieldName,
): Pair<Pair<SequenceFilterFieldName, Filter>, SequenceFilterFieldType> {
val filterExpressionId = when (type) {
is SequenceFilterFieldType.DateFrom -> Pair(type.associatedField, SiloFilterPrimitive.DateBetween)
is SequenceFilterFieldType.DateTo -> Pair(type.associatedField, SiloFilterPrimitive.DateBetween)
SequenceFilterFieldType.Date -> Pair(key, SiloFilterPrimitive.DateBetween)
SequenceFilterFieldType.PangoLineage -> Pair(key, SiloFilterPrimitive.PangoLineage)
SequenceFilterFieldType.String -> Pair(key, SiloFilterPrimitive.StringEquals)
SequenceFilterFieldType.MutationsList -> Pair(key, SiloFilterPrimitive.NucleotideSymbolEquals)
is SequenceFilterFieldType.DateFrom -> Pair(type.associatedField, Filter.DateBetween)
is SequenceFilterFieldType.DateTo -> Pair(type.associatedField, Filter.DateBetween)
SequenceFilterFieldType.Date -> Pair(key, Filter.DateBetween)
SequenceFilterFieldType.PangoLineage -> Pair(key, Filter.PangoLineage)
SequenceFilterFieldType.String -> Pair(key, Filter.StringEquals)
SequenceFilterFieldType.MutationsList -> Pair(key, Filter.NucleotideSymbolEquals)
SequenceFilterFieldType.VariantQuery -> Pair(key, Filter.VariantQuery)

null -> throw IllegalArgumentException(
"'$key' is not a valid sequence filter key. Valid keys are: " +
Expand All @@ -62,6 +75,16 @@ class SiloFilterExpressionMapper(private val allowedSequenceFilterFields: Sequen
return Pair(filterExpressionId, type)
}

private fun mapToVariantQueryFilter(variantQuery: String): SiloFilterExpression {
if (variantQuery.isBlank()) {
throw IllegalArgumentException(
"variantQuery cannot be empty",
)
}

return VariantQueryFacade().map(variantQuery)
}

private fun mapToDateBetweenFilter(
siloColumnName: String,
values: List<SequenceFilterValue>,
Expand Down Expand Up @@ -155,10 +178,13 @@ class SiloFilterExpressionMapper(private val allowedSequenceFilterFields: Sequen
}
}

private enum class SiloFilterPrimitive {
private enum class Filter {
StringEquals,
PangoLineage,
DateBetween,
NucleotideSymbolEquals,
VariantQuery,
}

private val variantQueryTypes = listOf(Filter.PangoLineage, Filter.NucleotideSymbolEquals)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package org.genspectrum.lapis.model

import VariantQueryBaseListener
import VariantQueryParser.AndContext
import VariantQueryParser.Nucleotide_mutationContext
import org.antlr.v4.runtime.tree.ParseTreeListener
import org.genspectrum.lapis.silo.And
import org.genspectrum.lapis.silo.NucleotideSymbolEquals
import org.genspectrum.lapis.silo.SiloFilterExpression

class VariantQueryCustomListener : VariantQueryBaseListener(), ParseTreeListener {
private var expressionStack = ArrayDeque<SiloFilterExpression>()

fun getExpr(): SiloFilterExpression {
return expressionStack.first()
}

override fun enterNucleotide_mutation(ctx: Nucleotide_mutationContext?) {
if (ctx == null) {
return
}
val position = ctx.position().text.toInt()
val secondSymbol = if (ctx.ambigous_nucleotide_symbol() != null) ctx.ambigous_nucleotide_symbol().text else "-"

val expr = NucleotideSymbolEquals(position, secondSymbol)
expressionStack.addLast(expr)
}

override fun exitAnd(ctx: AndContext?) {
val children = listOf(expressionStack.removeLast(), expressionStack.removeLast()).reversed()
expressionStack.addLast(And(children))
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package org.genspectrum.lapis.model

import VariantQueryLexer
import VariantQueryParser
import org.antlr.v4.runtime.CharStreams
import org.antlr.v4.runtime.CommonTokenStream
import org.antlr.v4.runtime.tree.ParseTreeWalker
import org.genspectrum.lapis.silo.SiloFilterExpression

class VariantQueryFacade {

fun map(variantQuery: String): SiloFilterExpression {
val lexer = VariantQueryLexer(CharStreams.fromString(variantQuery))
val tokens = CommonTokenStream(lexer)
val parser = VariantQueryParser(tokens)
val listener = VariantQueryCustomListener()

val walker = ParseTreeWalker()
walker.walk(listener, parser.start())

return listener.getExpr()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package org.genspectrum.lapis.model

import org.genspectrum.lapis.silo.And
import org.genspectrum.lapis.silo.NucleotideSymbolEquals
import org.hamcrest.MatcherAssert
import org.hamcrest.Matchers
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test

class VariantQueryFacadeTest {
private lateinit var underTest: VariantQueryFacade

@BeforeEach
fun setup() {
underTest = VariantQueryFacade()
}

@Test
fun `given a variant query with a single entry then map should return the corresponding SiloQuery`() {
val variantQuery = "300G"

val result = underTest.map(variantQuery)

val expectedResult = NucleotideSymbolEquals(300, "G")
MatcherAssert.assertThat(result, Matchers.equalTo(expectedResult))
}

@Test
fun `given a variant variantQuery with an and expression the map should return the corresponding SiloQuery`() {
val variantQuery = "300G & 400"

val result = underTest.map(variantQuery)

val expectedResult = And(
listOf(
NucleotideSymbolEquals(300, "G"),
NucleotideSymbolEquals(400, "-"),
),
)
MatcherAssert.assertThat(result, Matchers.equalTo(expectedResult))
}

@Test
fun `given a variant variantQuery with two and expression the map should return the corresponding SiloQuery`() {
val variantQuery = "300G & 400- & 500B"

val result = underTest.map(variantQuery)

val expectedResult = And(
listOf(
And(
listOf(
NucleotideSymbolEquals(300, "G"),
NucleotideSymbolEquals(400, "-"),
),
),
NucleotideSymbolEquals(500, "B"),
),
)
MatcherAssert.assertThat(result, Matchers.equalTo(expectedResult))
}
}
2 changes: 2 additions & 0 deletions lapis2/src/test/resources/config/testDatabaseConfig.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ schema:
type: string
- name: pangoLineage
type: pango_lineage
features:
- feature: sarsCoV2VariantQuery
primaryKey: gisaid_epi_isl

0 comments on commit 3a7a7a2

Please sign in to comment.