Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing HGVS unchanged nucleic acid sequence. #517

Merged
merged 1 commit into from
Jun 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
The deprecated classes are due to be removed in v0.36.
* Adding modules for importing VCF files into H2 database files, listing the meta data conents, and annotating VCF files.

### jannovar-hgvs

* Fixing parsing of unchanged (`=`) for nucleic acide sequences (#493).

## v0.34

### jannovar-cli
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ aa_change_inner
| aa_change_substitution
| aa_change_ssr
| aa_change_insertion
| aa_change_unchanged
| aa_change_misc
;

Expand All @@ -131,6 +132,16 @@ aa_change_deletion
)?
;


/** amino acid unchanged */
aa_change_unchanged
:
(
aa_point_location
| aa_range
) AA_EQUAL
;

/** amino acid duplication */
aa_change_duplication
:
Expand Down Expand Up @@ -373,6 +384,7 @@ nt_change_inner
| nt_change_inversion
| nt_change_substitution
| nt_change_ssr
| nt_change_unchanged
| nt_change_misc
;

Expand Down Expand Up @@ -402,6 +414,18 @@ nt_change_duplication
)?
;

/** unchanged nucleotides */
nt_change_unchanged
:
(
(
nt_point_location
nt_string
)
| nt_range
) NT_EQUAL
;

/** nucleotide replacement/indel/delins */
nt_change_indel
:
Expand Down Expand Up @@ -596,4 +620,4 @@ legacy_point_location
NT_MINUS
| NT_PLUS
) nt_number
;
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package de.charite.compbio.jannovar.hgvs.nts.change;

import com.google.common.base.Joiner;
import de.charite.compbio.jannovar.hgvs.nts.NucleotideRange;
import de.charite.compbio.jannovar.hgvs.nts.NucleotideSeqDescription;

/**
* Unchanged in a nucleotide sequence.
*
* @author <a href="mailto:manuel.holtgrewe@bihealth.de">Manuel Holtgrewe</a>
*/
public class NucleotideUnchanged extends NucleotideChange {

/**
* unchanged range of nucleotides
*/
private final NucleotideRange range;
/**
* description of the deleted nucleotide sequence
*/
private final NucleotideSeqDescription seq;

public static NucleotideUnchanged buildWithOffset(boolean onlyPredicted, int firstPos, int firstPosOffset,
int lastPos, int lastPosOffset, NucleotideSeqDescription seq) {
return new NucleotideUnchanged(onlyPredicted, NucleotideRange.build(firstPos, firstPosOffset, lastPos,
lastPosOffset), seq);
}

public static NucleotideUnchanged buildWithOffsetWithSequence(boolean onlyPredicted, int firstPos,
int firstPosOffset, int lastPos, int lastPosOffset, String nts) {
return new NucleotideUnchanged(onlyPredicted, NucleotideRange.build(firstPos, firstPosOffset, lastPos,
lastPosOffset), new NucleotideSeqDescription(nts));
}

public static NucleotideUnchanged buildWithOffsetWithLength(boolean onlyPredicted, int firstPos, int firstPosOffset,
int lastPos, int lastPosOffset, int seqLen) {
return new NucleotideUnchanged(onlyPredicted, NucleotideRange.build(firstPos, firstPosOffset, lastPos,
lastPosOffset), new NucleotideSeqDescription(seqLen));
}

public static NucleotideUnchanged buildWithOffsetWithoutSeqDescription(boolean onlyPredicted, int firstPos,
int firstPosOffset, int lastPos, int lastPosOffset) {
return new NucleotideUnchanged(onlyPredicted, NucleotideRange.build(firstPos, firstPosOffset, lastPos,
lastPosOffset), new NucleotideSeqDescription());
}

public static NucleotideUnchanged build(boolean onlyPredicted, int firstPos, int lastPos,
NucleotideSeqDescription seq) {
return new NucleotideUnchanged(onlyPredicted, NucleotideRange.buildWithoutOffset(firstPos, lastPos), seq);
}

public static NucleotideUnchanged buildWithSequence(boolean onlyPredicted, int firstPos, int lastPos, String nts) {
return new NucleotideUnchanged(onlyPredicted, NucleotideRange.buildWithoutOffset(firstPos, lastPos),
new NucleotideSeqDescription(nts));
}

public static NucleotideUnchanged buildWithLength(boolean onlyPredicted, int firstPos, int lastPos, int seqLen) {
return new NucleotideUnchanged(onlyPredicted, NucleotideRange.buildWithoutOffset(firstPos, lastPos),
new NucleotideSeqDescription(seqLen));
}

public static NucleotideUnchanged buildWithoutSeqDescription(boolean onlyPredicted, int firstPos, int lastPos) {
return new NucleotideUnchanged(onlyPredicted, NucleotideRange.buildWithoutOffset(firstPos, lastPos),
new NucleotideSeqDescription());
}

public NucleotideUnchanged(boolean onlyPredicted, NucleotideRange range, NucleotideSeqDescription seq) {
super(onlyPredicted);
this.range = range;
this.seq = seq;
}

@Override
public NucleotideUnchanged withOnlyPredicted(boolean flag) {
return new NucleotideUnchanged(flag, range, seq);
}

public NucleotideRange getRange() {
return range;
}

public NucleotideSeqDescription getSeq() {
return seq;
}

@Override
public String toHGVSString() {
return wrapIfOnlyPredicted(Joiner.on("").join(range.toHGVSString(), seq.toHGVSString(), "="));
}

@Override
public String toString() {
return "NucleotideUnchanged [range=" + range + ", seq=" + seq + "]";
}

@Override
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
result = prime * result + ((range == null) ? 0 : range.hashCode());
result = prime * result + ((seq == null) ? 0 : seq.hashCode());
return result;
}

@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (!super.equals(obj))
return false;
if (getClass() != obj.getClass())
return false;
NucleotideUnchanged other = (NucleotideUnchanged) obj;
if (range == null) {
if (other.range != null)
return false;
} else if (!range.equals(other.range))
return false;
if (seq == null) {
if (other.seq != null)
return false;
} else if (!seq.equals(other.seq))
return false;
return true;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,31 @@ else if (ctx.nt_string() != null)
setValue(ctx, change);
}


/**
* Leaving of nt_change_unchanged rule
* <p>
* Construct {@link NucleotideUnchanged} from children's values and labels and label ctx with
* this.
*/
@Override
public void exitNt_change_unchanged(Nt_change_unchangedContext ctx) {
LOGGER.debug("Leaving nt_change_unchanged");
final NucleotideRange range;
if (ctx.nt_range() != null)
range = (NucleotideRange) getValue(ctx.nt_range());
else
range = new NucleotideRange((NucleotidePointLocation) getValue(ctx.nt_point_location()),
(NucleotidePointLocation) getValue(ctx.nt_point_location()));
final NucleotideUnchanged change;
if (ctx.nt_string() != null)
change = new NucleotideUnchanged(false, range,
new NucleotideSeqDescription(ctx.nt_string().getText()));
else
change = new NucleotideUnchanged(false, range, new NucleotideSeqDescription());
setValue(ctx, change);
}

/**
* Leaving of nt_change_duplication rule
* <p>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package de.charite.compbio.jannovar.hgvs.protein.change;

import de.charite.compbio.jannovar.hgvs.AminoAcidCode;
import de.charite.compbio.jannovar.hgvs.protein.ProteinRange;
import de.charite.compbio.jannovar.hgvs.protein.ProteinSeqDescription;

// TODO(holtgrewe): Remove seqDesc?

/**
* Unchanged mark inside protein.
*
* @author <a href="mailto:manuel.holtgrewe@bihealth.de">Manuel Holtgrewe</a>
*/
public class ProteinUnchanged extends ProteinChange {

/**
* range of one or more amino acids that are deleted
*/
private final ProteinRange range;
/**
* specification of the deleted characters, can be null
*/
private final ProteinSeqDescription seqDesc;

/**
* Construct ProteinDeletion without length and sequence information
*/
public static ProteinUnchanged buildWithoutSeqDescription(boolean onlyPredicted, String firstAA, int firstPos,
String lastAA, int lastPos) {
return new ProteinUnchanged(onlyPredicted, ProteinRange.build(firstAA, firstPos, lastAA, lastPos));
}

/**
* Construct ProteinDeletion with length information
*/
public static ProteinUnchanged buildWithLength(boolean onlyPredicted, String firstAA, int firstPos,
String lastAA, int lastPos, int length) {
return new ProteinUnchanged(onlyPredicted, ProteinRange.build(firstAA, firstPos, lastAA, lastPos), length);
}

/**
* Construct ProteinDeletion with sequence
*/
public static ProteinUnchanged buildWithSequence(boolean onlyPredicted, String firstAA, int firstPos, String lastAA,
int lastPos, String seq) {
return new ProteinUnchanged(onlyPredicted, ProteinRange.build(firstAA, firstPos, lastAA, lastPos), seq);
}

/**
* Construct ProteinDeletion without length and sequence information
*/
public ProteinUnchanged(boolean onlyPredicted, ProteinRange range) {
super(onlyPredicted);
this.range = range;
this.seqDesc = new ProteinSeqDescription();
}

/**
* Construct ProteinDeletion with length information
*/
public ProteinUnchanged(boolean onlyPredicted, ProteinRange range, int length) {
super(onlyPredicted);
this.range = range;
this.seqDesc = new ProteinSeqDescription(length);
}

/**
* Construct ProteinDeletion with sequence information
*/
public ProteinUnchanged(boolean onlyPredicted, ProteinRange range, String seq) {
super(onlyPredicted);
this.range = range;
this.seqDesc = new ProteinSeqDescription(seq);
}

private ProteinUnchanged(boolean onlyPredicted, ProteinRange range, ProteinSeqDescription seqDesc) {
super(onlyPredicted);
this.range = range;
this.seqDesc = seqDesc;
}

/**
* @return deleted range in the protein
*/
public ProteinRange getRange() {
return range;
}

/**
* @return description of the deleted sequence
*/
public ProteinSeqDescription getSeqDesc() {
return seqDesc;
}

@Override
public String toHGVSString(AminoAcidCode code) {
return wrapIfOnlyPredicted(range.toHGVSString(code) + "=");
}

@Override
public ProteinChange withOnlyPredicted(boolean onlyPredicted) {
return new ProteinUnchanged(onlyPredicted, this.range, this.seqDesc);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public void setUp() throws Exception {
@Test
public void testParseString() {
String hgvsStrings[] = new String[]{"XXX:r.(?)", "XXX:r.?", "XXX:r.spl?", "XXX:r.(spl?)", "XXX:r.=",
"XXX:r.(=)", "XXX:r.0", "XXX:r.(0)"};
"XXX:r.(=)", "XXX:r.0", "XXX:r.(0)", "XXX:c.97A=", "XXX:c.79_97="};

for (String hgvsString : hgvsStrings) {
HGVSVariant variant = driver.parseHGVSString(hgvsString);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,13 @@ public void testNucleotideSingleVarSubstitution() {
parseString(PREFIX + t + s);
}

@Test
public void testNucleotideSingleVarUnchanged() {
String[] types = {"c.", "m.", "n.", "g.", "r."};
String[] changes = {"76A=", "67_76="};
for (String t : types)
for (String s : changes)
parseString(PREFIX + t + s);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,13 @@ public void testProteinSingleShortFrameShift() {
parseString(PREFIX + s);
}

@Test
public void testProteinUnchanged() {
String[] arr = {"p.G33=", "p.Arg97_Cys100="};
for (String s : arr)
parseString(PREFIX + s);
}

@Test
public void testProteinSingleLongFrameShift() {
String[] arr = {"p.Arg97Profs*23", "p.A97Pfs*23"};
Expand Down