diff --git a/CHANGELOG.md b/CHANGELOG.md index dc154d8407..da275fb1ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,10 @@ The deprecated classes are due to be removed in v0.36. * Adding modules for importing VCF files into H2 database files, listing the meta data conents, and annotating VCF files. +### jannovar-hgvs + +* Fixing parsing of unchanged (`=`) for nucleic acide sequences (#493). + ## v0.34 ### jannovar-cli diff --git a/jannovar-hgvs/src/main/antlr4/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParser.g4 b/jannovar-hgvs/src/main/antlr4/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParser.g4 index 9d6b640775..0bee40176a 100644 --- a/jannovar-hgvs/src/main/antlr4/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParser.g4 +++ b/jannovar-hgvs/src/main/antlr4/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParser.g4 @@ -115,6 +115,7 @@ aa_change_inner | aa_change_substitution | aa_change_ssr | aa_change_insertion + | aa_change_unchanged | aa_change_misc ; @@ -131,6 +132,16 @@ aa_change_deletion )? ; + +/** amino acid unchanged */ +aa_change_unchanged +: + ( + aa_point_location + | aa_range + ) AA_EQUAL +; + /** amino acid duplication */ aa_change_duplication : @@ -373,6 +384,7 @@ nt_change_inner | nt_change_inversion | nt_change_substitution | nt_change_ssr + | nt_change_unchanged | nt_change_misc ; @@ -402,6 +414,18 @@ nt_change_duplication )? ; +/** unchanged nucleotides */ +nt_change_unchanged +: + ( + ( + nt_point_location + nt_string + ) + | nt_range + ) NT_EQUAL +; + /** nucleotide replacement/indel/delins */ nt_change_indel : @@ -596,4 +620,4 @@ legacy_point_location NT_MINUS | NT_PLUS ) nt_number -; \ No newline at end of file +; diff --git a/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideUnchanged.java b/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideUnchanged.java new file mode 100644 index 0000000000..19edb2fd3a --- /dev/null +++ b/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/nts/change/NucleotideUnchanged.java @@ -0,0 +1,127 @@ +package de.charite.compbio.jannovar.hgvs.nts.change; + +import com.google.common.base.Joiner; +import de.charite.compbio.jannovar.hgvs.nts.NucleotideRange; +import de.charite.compbio.jannovar.hgvs.nts.NucleotideSeqDescription; + +/** + * Unchanged in a nucleotide sequence. + * + * @author Manuel Holtgrewe + */ +public class NucleotideUnchanged extends NucleotideChange { + + /** + * unchanged range of nucleotides + */ + private final NucleotideRange range; + /** + * description of the deleted nucleotide sequence + */ + private final NucleotideSeqDescription seq; + + public static NucleotideUnchanged buildWithOffset(boolean onlyPredicted, int firstPos, int firstPosOffset, + int lastPos, int lastPosOffset, NucleotideSeqDescription seq) { + return new NucleotideUnchanged(onlyPredicted, NucleotideRange.build(firstPos, firstPosOffset, lastPos, + lastPosOffset), seq); + } + + public static NucleotideUnchanged buildWithOffsetWithSequence(boolean onlyPredicted, int firstPos, + int firstPosOffset, int lastPos, int lastPosOffset, String nts) { + return new NucleotideUnchanged(onlyPredicted, NucleotideRange.build(firstPos, firstPosOffset, lastPos, + lastPosOffset), new NucleotideSeqDescription(nts)); + } + + public static NucleotideUnchanged buildWithOffsetWithLength(boolean onlyPredicted, int firstPos, int firstPosOffset, + int lastPos, int lastPosOffset, int seqLen) { + return new NucleotideUnchanged(onlyPredicted, NucleotideRange.build(firstPos, firstPosOffset, lastPos, + lastPosOffset), new NucleotideSeqDescription(seqLen)); + } + + public static NucleotideUnchanged buildWithOffsetWithoutSeqDescription(boolean onlyPredicted, int firstPos, + int firstPosOffset, int lastPos, int lastPosOffset) { + return new NucleotideUnchanged(onlyPredicted, NucleotideRange.build(firstPos, firstPosOffset, lastPos, + lastPosOffset), new NucleotideSeqDescription()); + } + + public static NucleotideUnchanged build(boolean onlyPredicted, int firstPos, int lastPos, + NucleotideSeqDescription seq) { + return new NucleotideUnchanged(onlyPredicted, NucleotideRange.buildWithoutOffset(firstPos, lastPos), seq); + } + + public static NucleotideUnchanged buildWithSequence(boolean onlyPredicted, int firstPos, int lastPos, String nts) { + return new NucleotideUnchanged(onlyPredicted, NucleotideRange.buildWithoutOffset(firstPos, lastPos), + new NucleotideSeqDescription(nts)); + } + + public static NucleotideUnchanged buildWithLength(boolean onlyPredicted, int firstPos, int lastPos, int seqLen) { + return new NucleotideUnchanged(onlyPredicted, NucleotideRange.buildWithoutOffset(firstPos, lastPos), + new NucleotideSeqDescription(seqLen)); + } + + public static NucleotideUnchanged buildWithoutSeqDescription(boolean onlyPredicted, int firstPos, int lastPos) { + return new NucleotideUnchanged(onlyPredicted, NucleotideRange.buildWithoutOffset(firstPos, lastPos), + new NucleotideSeqDescription()); + } + + public NucleotideUnchanged(boolean onlyPredicted, NucleotideRange range, NucleotideSeqDescription seq) { + super(onlyPredicted); + this.range = range; + this.seq = seq; + } + + @Override + public NucleotideUnchanged withOnlyPredicted(boolean flag) { + return new NucleotideUnchanged(flag, range, seq); + } + + public NucleotideRange getRange() { + return range; + } + + public NucleotideSeqDescription getSeq() { + return seq; + } + + @Override + public String toHGVSString() { + return wrapIfOnlyPredicted(Joiner.on("").join(range.toHGVSString(), seq.toHGVSString(), "=")); + } + + @Override + public String toString() { + return "NucleotideUnchanged [range=" + range + ", seq=" + seq + "]"; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((range == null) ? 0 : range.hashCode()); + result = prime * result + ((seq == null) ? 0 : seq.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + NucleotideUnchanged other = (NucleotideUnchanged) obj; + if (range == null) { + if (other.range != null) + return false; + } else if (!range.equals(other.range)) + return false; + if (seq == null) { + if (other.seq != null) + return false; + } else if (!seq.equals(other.seq)) + return false; + return true; + } + +} diff --git a/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParserListenerImpl.java b/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParserListenerImpl.java index e696c7c3ca..a23babb570 100644 --- a/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParserListenerImpl.java +++ b/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/parser/Antlr4HGVSParserListenerImpl.java @@ -318,6 +318,31 @@ else if (ctx.nt_string() != null) setValue(ctx, change); } + + /** + * Leaving of nt_change_unchanged rule + *
+ * Construct {@link NucleotideUnchanged} from children's values and labels and label ctx with + * this. + */ + @Override + public void exitNt_change_unchanged(Nt_change_unchangedContext ctx) { + LOGGER.debug("Leaving nt_change_unchanged"); + final NucleotideRange range; + if (ctx.nt_range() != null) + range = (NucleotideRange) getValue(ctx.nt_range()); + else + range = new NucleotideRange((NucleotidePointLocation) getValue(ctx.nt_point_location()), + (NucleotidePointLocation) getValue(ctx.nt_point_location())); + final NucleotideUnchanged change; + if (ctx.nt_string() != null) + change = new NucleotideUnchanged(false, range, + new NucleotideSeqDescription(ctx.nt_string().getText())); + else + change = new NucleotideUnchanged(false, range, new NucleotideSeqDescription()); + setValue(ctx, change); + } + /** * Leaving of nt_change_duplication rule *
diff --git a/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/protein/change/ProteinUnchanged.java b/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/protein/change/ProteinUnchanged.java new file mode 100644 index 0000000000..1e2733199a --- /dev/null +++ b/jannovar-hgvs/src/main/java/de/charite/compbio/jannovar/hgvs/protein/change/ProteinUnchanged.java @@ -0,0 +1,106 @@ +package de.charite.compbio.jannovar.hgvs.protein.change; + +import de.charite.compbio.jannovar.hgvs.AminoAcidCode; +import de.charite.compbio.jannovar.hgvs.protein.ProteinRange; +import de.charite.compbio.jannovar.hgvs.protein.ProteinSeqDescription; + +// TODO(holtgrewe): Remove seqDesc? + +/** + * Unchanged mark inside protein. + * + * @author Manuel Holtgrewe + */ +public class ProteinUnchanged extends ProteinChange { + + /** + * range of one or more amino acids that are deleted + */ + private final ProteinRange range; + /** + * specification of the deleted characters, can be null + */ + private final ProteinSeqDescription seqDesc; + + /** + * Construct ProteinDeletion without length and sequence information + */ + public static ProteinUnchanged buildWithoutSeqDescription(boolean onlyPredicted, String firstAA, int firstPos, + String lastAA, int lastPos) { + return new ProteinUnchanged(onlyPredicted, ProteinRange.build(firstAA, firstPos, lastAA, lastPos)); + } + + /** + * Construct ProteinDeletion with length information + */ + public static ProteinUnchanged buildWithLength(boolean onlyPredicted, String firstAA, int firstPos, + String lastAA, int lastPos, int length) { + return new ProteinUnchanged(onlyPredicted, ProteinRange.build(firstAA, firstPos, lastAA, lastPos), length); + } + + /** + * Construct ProteinDeletion with sequence + */ + public static ProteinUnchanged buildWithSequence(boolean onlyPredicted, String firstAA, int firstPos, String lastAA, + int lastPos, String seq) { + return new ProteinUnchanged(onlyPredicted, ProteinRange.build(firstAA, firstPos, lastAA, lastPos), seq); + } + + /** + * Construct ProteinDeletion without length and sequence information + */ + public ProteinUnchanged(boolean onlyPredicted, ProteinRange range) { + super(onlyPredicted); + this.range = range; + this.seqDesc = new ProteinSeqDescription(); + } + + /** + * Construct ProteinDeletion with length information + */ + public ProteinUnchanged(boolean onlyPredicted, ProteinRange range, int length) { + super(onlyPredicted); + this.range = range; + this.seqDesc = new ProteinSeqDescription(length); + } + + /** + * Construct ProteinDeletion with sequence information + */ + public ProteinUnchanged(boolean onlyPredicted, ProteinRange range, String seq) { + super(onlyPredicted); + this.range = range; + this.seqDesc = new ProteinSeqDescription(seq); + } + + private ProteinUnchanged(boolean onlyPredicted, ProteinRange range, ProteinSeqDescription seqDesc) { + super(onlyPredicted); + this.range = range; + this.seqDesc = seqDesc; + } + + /** + * @return deleted range in the protein + */ + public ProteinRange getRange() { + return range; + } + + /** + * @return description of the deleted sequence + */ + public ProteinSeqDescription getSeqDesc() { + return seqDesc; + } + + @Override + public String toHGVSString(AminoAcidCode code) { + return wrapIfOnlyPredicted(range.toHGVSString(code) + "="); + } + + @Override + public ProteinChange withOnlyPredicted(boolean onlyPredicted) { + return new ProteinUnchanged(onlyPredicted, this.range, this.seqDesc); + } + +} diff --git a/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/HGVSParserDriverNucleotideMiscChangeTest.java b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/HGVSParserDriverNucleotideMiscChangeTest.java index 404074b2f1..5aa4ad2c76 100644 --- a/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/HGVSParserDriverNucleotideMiscChangeTest.java +++ b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/HGVSParserDriverNucleotideMiscChangeTest.java @@ -23,7 +23,7 @@ public void setUp() throws Exception { @Test public void testParseString() { String hgvsStrings[] = new String[]{"XXX:r.(?)", "XXX:r.?", "XXX:r.spl?", "XXX:r.(spl?)", "XXX:r.=", - "XXX:r.(=)", "XXX:r.0", "XXX:r.(0)"}; + "XXX:r.(=)", "XXX:r.0", "XXX:r.(0)", "XXX:c.97A=", "XXX:c.79_97="}; for (String hgvsString : hgvsStrings) { HGVSVariant variant = driver.parseHGVSString(hgvsString); diff --git a/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/nts/NucleotideParsingTest.java b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/nts/NucleotideParsingTest.java index 238fcdfc9f..9b42ea0aa1 100644 --- a/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/nts/NucleotideParsingTest.java +++ b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/nts/NucleotideParsingTest.java @@ -21,4 +21,13 @@ public void testNucleotideSingleVarSubstitution() { parseString(PREFIX + t + s); } + @Test + public void testNucleotideSingleVarUnchanged() { + String[] types = {"c.", "m.", "n.", "g.", "r."}; + String[] changes = {"76A=", "67_76="}; + for (String t : types) + for (String s : changes) + parseString(PREFIX + t + s); + } + } diff --git a/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/protein/ProteinParsingTest.java b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/protein/ProteinParsingTest.java index 24a15ed989..0a6d017111 100644 --- a/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/protein/ProteinParsingTest.java +++ b/jannovar-hgvs/src/test/java/de/charite/compbio/jannovar/hgvs/parser/protein/ProteinParsingTest.java @@ -69,6 +69,13 @@ public void testProteinSingleShortFrameShift() { parseString(PREFIX + s); } + @Test + public void testProteinUnchanged() { + String[] arr = {"p.G33=", "p.Arg97_Cys100="}; + for (String s : arr) + parseString(PREFIX + s); + } + @Test public void testProteinSingleLongFrameShift() { String[] arr = {"p.Arg97Profs*23", "p.A97Pfs*23"};