From b4ce36137fd4059ade99afaae49efeee623cfa44 Mon Sep 17 00:00:00 2001 From: Jonas Kellerer Date: Thu, 13 Apr 2023 17:17:00 +0200 Subject: [PATCH] feat: allow upper and lowercase for queries --- .../model/variantqueryparser/VariantQuery.g4 | 37 +++++++++++-------- .../lapis/model/VariantQueryCustomListener.kt | 18 ++++----- .../lapis/model/VariantQueryFacadeTest.kt | 2 +- 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/lapis2/src/main/antlr/org/genspectrum/lapis/model/variantqueryparser/VariantQuery.g4 b/lapis2/src/main/antlr/org/genspectrum/lapis/model/variantqueryparser/VariantQuery.g4 index 5bc98090..f10fd6ea 100644 --- a/lapis2/src/main/antlr/org/genspectrum/lapis/model/variantqueryparser/VariantQuery.g4 +++ b/lapis2/src/main/antlr/org/genspectrum/lapis/model/variantqueryparser/VariantQuery.g4 @@ -13,21 +13,25 @@ expr: ; single: - nucleotide_mutation + nucleotide_mutation_query | pangolineage_query | n_of_query - | nucleotide_insertion - | aa_mutation - | aa_insertion + | nucleotide_insertion_query + | aa_mutation_query + | aa_insertion_query | nextclade_pangolineage_query | nextstrain_clade_lineage_query | gisaid_clade_lineage_query ; -nucleotide_mutation : nucleotide_symbol? position ambigous_nucleotide_symbol?; +nucleotide_mutation_query : nucleotide_mutation_query_first_symbol? position nucleotide_mutation_query_second_symbol?; +nucleotide_mutation_query_first_symbol: nucleotide_symbol; +nucleotide_mutation_query_second_symbol: possible_ambigous_nucleotide_symbol; position: NUMBER+; nucleotide_symbol: A | C | G | T; -ambigous_nucleotide_symbol: nucleotide_symbol | M | R | W | S | Y | K | V | H | D | B | N | MINUS | DOT; +ambigous_nucleotide_symbol: M | R | W | S | Y | K | V | H | D | B | N | MINUS | DOT; +possible_ambigous_nucleotide_symbol: nucleotide_symbol | ambigous_nucleotide_symbol; + pangolineage_query: pangolineage pangolineage_include_sublineages?; pangolineage: pangolineage_character pangolineage_character? pangolineage_character? pangolineage_number_component*; @@ -35,31 +39,34 @@ pangolineage_character: A | B | C | D | E | F | G | H | I | J | K | L | M | N | pangolineage_number_component: '.' NUMBER NUMBER? NUMBER?; pangolineage_include_sublineages: DOT? ASTERISK; -n_of_query: '[' n_of_match_exactly? n_of_number_of_matchers '-of:' n_of_exprs ']'; -n_of_match_exactly: 'EXACTLY-'; +n_of_query: '[' n_of_match_exactly? n_of_number_of_matchers no_of_of_keyword n_of_exprs ']'; +no_of_of_keyword: '-of:' | '-OF:'; +n_of_match_exactly: 'EXACTLY-' | 'exactly-'; n_of_number_of_matchers: NUMBER+; n_of_exprs: expr (',' expr)*; -nucleotide_insertion: 'ins_' position ':' (ambigous_nucleotide_symbol | '?')+; +nucleotide_insertion_query: insertion_keyword position ':' (possible_ambigous_nucleotide_symbol | '?')+; +insertion_keyword: 'ins_' | 'INS_'; -aa_mutation: gene ':' aa_symbol? position ambigous_aa_symbol?; +aa_mutation_query: gene ':' aa_symbol? position possible_ambigous_aa_symbol?; aa_symbol: A | R | N | D | C | E | Q | G | H | I | L | K | M | F | P | S | T | W | Y | V | ASTERISK; -ambigous_aa_symbol: aa_symbol | X | MINUS | DOT; +ambigous_aa_symbol: X | MINUS | DOT; +possible_ambigous_aa_symbol: aa_symbol | ambigous_aa_symbol; gene: covid_gene; covid_gene : E | M | N | S | ORF; -aa_insertion: 'ins_' gene ':' (ambigous_aa_symbol | '?')+; +aa_insertion_query: insertion_keyword gene ':' position ':' (possible_ambigous_aa_symbol | '?')+; nextclade_pangolineage_query: nextclade_pango_lineage_prefix pangolineage_query; -nextclade_pango_lineage_prefix: 'nextcladePangoLineage:'; +nextclade_pango_lineage_prefix: 'nextcladePangoLineage:' | 'NEXTCLADEPANGOLINEAGE:'; nextstrain_clade_lineage_query: nextstrain_clade_prefix nextstrain_clade_query; -nextstrain_clade_prefix: 'nextstrainClade:'; +nextstrain_clade_prefix: 'nextstrainClade:'| 'NEXTSTRAINCLADE:'; nextstrain_clade_query: NUMBER NUMBER nextstrain_clade_character | 'RECOMBINANT'; nextstrain_clade_character: A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z; gisaid_clade_lineage_query: gisaid_clade_prefix gisaid_clade_query; -gisaid_clade_prefix: 'gisaid:'; +gisaid_clade_prefix: ('gisaid:'| 'GISAID:'); gisaid_clade_query: gisaid_clade_character gisaid_clade_character?; gisaid_clade_character: A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z; diff --git a/lapis2/src/main/kotlin/org/genspectrum/lapis/model/VariantQueryCustomListener.kt b/lapis2/src/main/kotlin/org/genspectrum/lapis/model/VariantQueryCustomListener.kt index b5bd1e6b..ea48ea84 100644 --- a/lapis2/src/main/kotlin/org/genspectrum/lapis/model/VariantQueryCustomListener.kt +++ b/lapis2/src/main/kotlin/org/genspectrum/lapis/model/VariantQueryCustomListener.kt @@ -1,8 +1,8 @@ package org.genspectrum.lapis.model import VariantQueryBaseListener -import VariantQueryParser.Aa_insertionContext -import VariantQueryParser.Aa_mutationContext +import VariantQueryParser.Aa_insertion_queryContext +import VariantQueryParser.Aa_mutation_queryContext import VariantQueryParser.AndContext import VariantQueryParser.Gisaid_clade_lineage_queryContext import VariantQueryParser.MaybeContext @@ -10,8 +10,8 @@ import VariantQueryParser.N_of_queryContext import VariantQueryParser.Nextclade_pangolineage_queryContext import VariantQueryParser.Nextstrain_clade_queryContext import VariantQueryParser.NotContext -import VariantQueryParser.Nucleotide_insertionContext -import VariantQueryParser.Nucleotide_mutationContext +import VariantQueryParser.Nucleotide_insertion_queryContext +import VariantQueryParser.Nucleotide_mutation_queryContext import VariantQueryParser.OrContext import VariantQueryParser.Pangolineage_queryContext import org.antlr.v4.runtime.tree.ParseTreeListener @@ -31,12 +31,12 @@ class VariantQueryCustomListener : VariantQueryBaseListener(), ParseTreeListener return expressionStack.first() } - override fun enterNucleotide_mutation(ctx: Nucleotide_mutationContext?) { + override fun enterNucleotide_mutation_query(ctx: Nucleotide_mutation_queryContext?) { if (ctx == null) { return } val position = ctx.position().text.toInt() - val secondSymbol = ctx.ambigous_nucleotide_symbol()?.text ?: "-" + val secondSymbol = ctx.nucleotide_mutation_query_second_symbol()?.text ?: "-" val expr = NucleotideSymbolEquals(position, secondSymbol) expressionStack.addLast(expr) @@ -89,15 +89,15 @@ class VariantQueryCustomListener : VariantQueryBaseListener(), ParseTreeListener expressionStack.addLast(NOf(n, matchExactly, children.reversed())) } - override fun enterNucleotide_insertion(ctx: Nucleotide_insertionContext?) { + override fun enterNucleotide_insertion_query(ctx: Nucleotide_insertion_queryContext?) { throw NotImplementedError("Nucleotide insertions are not supported yet.") } - override fun enterAa_mutation(ctx: Aa_mutationContext?) { + override fun enterAa_mutation_query(ctx: Aa_mutation_queryContext?) { throw NotImplementedError("Amino acid mutations are not supported yet.") } - override fun enterAa_insertion(ctx: Aa_insertionContext?) { + override fun enterAa_insertion_query(ctx: Aa_insertion_queryContext?) { throw NotImplementedError("Amino acid insertions are not supported yet.") } diff --git a/lapis2/src/test/kotlin/org/genspectrum/lapis/model/VariantQueryFacadeTest.kt b/lapis2/src/test/kotlin/org/genspectrum/lapis/model/VariantQueryFacadeTest.kt index b2bdefb6..8d872cd3 100644 --- a/lapis2/src/test/kotlin/org/genspectrum/lapis/model/VariantQueryFacadeTest.kt +++ b/lapis2/src/test/kotlin/org/genspectrum/lapis/model/VariantQueryFacadeTest.kt @@ -242,7 +242,7 @@ class VariantQueryFacadeTest { @Test fun `given a valid variantQuery with a 'AA insertion' expression the map should throw an error`() { - val variantQuery = "ins_S:N501EPE" + val variantQuery = "ins_S:501:EPE" val exception = assertThrows { underTest.map(variantQuery) }