From b26f49f84675295f5b53f997b8e4656c40f7d3dd Mon Sep 17 00:00:00 2001 From: Julie Sullivan Date: Wed, 20 Jan 2021 14:53:34 +0000 Subject: [PATCH 1/2] merge in MNV fixes from hotfix 1.5.x branch --- .../tools/variant/VariantNormalizer.java | 13 +- .../tools/variant/VariantNormalizerTest.java | 183 ++++++++++++++++++ pom.xml | 1 - 3 files changed, 193 insertions(+), 4 deletions(-) diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java index 990d61e8f..2772e9cca 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java @@ -978,10 +978,15 @@ public static List decomposeAlignmentSingleVariants(String ref VariantKeyFields keyFields = null; char previousReferenceChar = 0; char previousAlternateChar = 0; + int originalKeyFieldsIndex = 0; // Assume that as a result of the alignment "reference" and "alternate" Strings are of the same length for (int i = 0; i < reference.length(); i++) { char referenceChar = reference.charAt(i); char alternateChar = alternate.charAt(i); + if (referenceChar != '-') { + // keep track where we are in the original reference + originalKeyFieldsIndex++; + } // Insertion if (referenceChar == '-') { // Assume there cannot be a '-' at the reference and alternate aligned sequences at the same position @@ -1003,16 +1008,18 @@ public static List decomposeAlignmentSingleVariants(String ref // Current character is a continuation of a deletion if (previousAlternateChar == '-') { keyFields.setReference(keyFields.getReference() + referenceChar); - keyFields.setEnd(keyFields.getEnd()+1); + keyFields.setEnd(keyFields.getEnd() + 1); // New deletion found, create new keyFields } else { - keyFields = new VariantKeyFields(genomicStart + i, genomicStart + i, + int originalPosition = genomicStart + originalKeyFieldsIndex - 1; + keyFields = new VariantKeyFields(originalPosition, originalPosition, String.valueOf(referenceChar),"", originalKeyFields); keyFieldsList.add(keyFields); } // SNV } else if (referenceChar != alternateChar) { - keyFields = new VariantKeyFields(genomicStart + i, genomicStart + i, + int originalPosition = genomicStart + originalKeyFieldsIndex - 1; + keyFields = new VariantKeyFields(originalPosition, originalPosition, String.valueOf(referenceChar), String.valueOf(alternateChar), originalKeyFields); keyFieldsList.add(keyFields); } diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java index 952651906..0438b232c 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java @@ -123,6 +123,189 @@ public void testNormalizeFalseMNV() throws NonStandardCompliantSampleField { assertEquals(1, normalizedVariantList.get(0).getLength().intValue()); } + @Test + public void testMNVInsertion() { + + VariantNormalizer.VariantNormalizerConfig variantNormalizerConfig + = (new VariantNormalizer.VariantNormalizerConfig()) + .setReuseVariants(true) + .setNormalizeAlleles(true) + .setDecomposeMNVs(true); + + VariantNormalizer variantNormalizer = new VariantNormalizer(variantNormalizerConfig); + + // clinvar ID 266834 + // chr13:32316508:GAC:ATCGATCGAT + // chr13:32316508:G:ATCGATCG + // chr13:32316510:C:T + Variant variant = new Variant("13:32316508:GAC:ATCGATCGAT"); + List normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); + assertEquals(2, normalizedVariantList.size()); + + Variant normalizedVariant = normalizedVariantList.get(0); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("13", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(32316508), normalizedVariant.getStart()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("ATCGATC", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(1); + assertEquals(VariantType.SNV, normalizedVariant.getType()); + assertEquals("13", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(32316510), normalizedVariant.getStart()); + assertEquals("C", normalizedVariant.getReference()); + assertEquals("T", normalizedVariant.getAlternate()); + +// clinvar ID 233410 +// chr2:47800055:TCAA:ATTAAA +// chr2:47800055:T:ATT +// chr2:47800056:C:A + + variant = new Variant("2:47800055:TCAA:ATTAAA"); + normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); + assertEquals(2, normalizedVariantList.size()); + + normalizedVariant = normalizedVariantList.get(0); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("2", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(47800055), normalizedVariant.getStart()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("AT", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(1); + assertEquals(VariantType.SNV, normalizedVariant.getType()); + assertEquals("2", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(47800056), normalizedVariant.getStart()); + assertEquals("C", normalizedVariant.getReference()); + assertEquals("A", normalizedVariant.getAlternate()); + + } + + @Test + public void testMVNDoubleDeletions() { + + VariantNormalizer.VariantNormalizerConfig variantNormalizerConfig + = (new VariantNormalizer.VariantNormalizerConfig()) + .setReuseVariants(true) + .setNormalizeAlleles(true) + .setDecomposeMNVs(true); + + VariantNormalizer variantNormalizer = new VariantNormalizer(variantNormalizerConfig); + +// clinvar ID 17618 +// chr15:42410982:AG:TCATCT +// chr15:42410981:T:TTC +// chr15:42410982:A:ATC +// chr15:42410983:G:T + + Variant variant = new Variant("15:42410982:AG:TCATCT"); + List normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); + assertEquals(3, normalizedVariantList.size()); + + Variant normalizedVariant = normalizedVariantList.get(0); + + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("15", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(42410982), normalizedVariant.getStart()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("TC", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(1); + assertEquals(VariantType.SNV, normalizedVariant.getType()); + assertEquals("15", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(42410983), normalizedVariant.getStart()); + assertEquals("G", normalizedVariant.getReference()); + assertEquals("T", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(2); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("15", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(42410985), normalizedVariant.getStart()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("TC", normalizedVariant.getAlternate()); + } + + // clinvar ID 266834 + @Test + public void testNoVCV() { + + VariantNormalizer.VariantNormalizerConfig variantNormalizerConfig + = (new VariantNormalizer.VariantNormalizerConfig()) + .setReuseVariants(true) + .setNormalizeAlleles(true) + .setDecomposeMNVs(true); + + VariantNormalizer variantNormalizer = new VariantNormalizer(variantNormalizerConfig); + +// chr13:32316508:GAC:ATCGATCGAT +// chr13:32316508:G:ATCGATCG +// chr13:32316510:C:T + + Variant variant = new Variant("13:32316508:GAC:ATCGATCGAT"); + List normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); + assertEquals(3, normalizedVariantList.size()); + + Variant normalizedVariant = normalizedVariantList.get(0); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("15", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(42410982), normalizedVariant.getStart()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("TC", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(1); + assertEquals(VariantType.SNV, normalizedVariant.getType()); + assertEquals("15", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(42410983), normalizedVariant.getStart()); + assertEquals("G", normalizedVariant.getReference()); + assertEquals("T", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(2); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("15", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(42410985), normalizedVariant.getStart()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("TC", normalizedVariant.getAlternate()); + + + } + + + @Test + public void testMVNDeletions() { + + VariantNormalizer.VariantNormalizerConfig variantNormalizerConfig + = (new VariantNormalizer.VariantNormalizerConfig()) + .setReuseVariants(true) + .setNormalizeAlleles(true) + .setDecomposeMNVs(true); + + VariantNormalizer variantNormalizer = new VariantNormalizer(variantNormalizerConfig); + +// chr1:5927667:GTT:CCACG +// chr1:5927666:G:CCAC +// chr1:5927667:GTT:G + + Variant variant = new Variant("1:5927667:GTT:CCACG"); + List normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); + assertEquals(2, normalizedVariantList.size()); + + Variant normalizedVariant = normalizedVariantList.get(0); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("1", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(5927667), normalizedVariant.getStart()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("CCAC", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(1); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("1", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(5927668), normalizedVariant.getStart()); + assertEquals("TT", normalizedVariant.getReference()); + assertEquals("", normalizedVariant.getAlternate()); + + } + + @Test public void testNormalizeSamplesDataMNV() throws NonStandardCompliantSampleField { normalizer.setDecomposeMNVs(true); diff --git a/pom.xml b/pom.xml index 40a53fd27..cd307bf2e 100644 --- a/pom.xml +++ b/pom.xml @@ -149,7 +149,6 @@ - com.github.samtools htsjdk From c723a3baed7a0bb1fe18415d8a72a3bc18762772 Mon Sep 17 00:00:00 2001 From: Julie Sullivan Date: Fri, 26 Feb 2021 10:20:37 +0000 Subject: [PATCH 2/2] fix merge error --- .../tools/variant/VariantNormalizerTest.java | 301 +++++++++++++++++- 1 file changed, 285 insertions(+), 16 deletions(-) diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java index 0438b232c..f3920835f 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java @@ -181,6 +181,136 @@ public void testMNVInsertion() { } + @Test + public void testWrongId() { + + VariantNormalizer.VariantNormalizerConfig variantNormalizerConfig + = (new VariantNormalizer.VariantNormalizerConfig()) + .setReuseVariants(true) + .setNormalizeAlleles(true) + .setDecomposeMNVs(true); + + VariantNormalizer variantNormalizer = new VariantNormalizer(variantNormalizerConfig); + // clinvar ID 266834 +// chr13:32316508:GAC:ATCGATCGAT +// chr13:32316508:G:ATCGATCG +// chr13:32316510:C:T + + Variant variant = new Variant("13:32316508:GAC:ATCGATCGAT"); + List normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); + assertEquals(2, normalizedVariantList.size()); + + Variant normalizedVariant = normalizedVariantList.get(0); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("13", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(32316508), normalizedVariant.getStart()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("ATCGATC", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(1); + assertEquals(VariantType.SNV, normalizedVariant.getType()); + assertEquals("13", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(32316510), normalizedVariant.getStart()); + assertEquals("C", normalizedVariant.getReference()); + assertEquals("T", normalizedVariant.getAlternate()); + + +// chr13:32339556:AAAAA:GAAAAG +// +//chr13:32339555:G:GG// +//chr13:32339560:A:G + variant = new Variant("13:32339556:AAAAA:GAAAAG"); + normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); + assertEquals(2, normalizedVariantList.size()); + + normalizedVariant = normalizedVariantList.get(0); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("13", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(32339556), normalizedVariant.getStart()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("G", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(1); + assertEquals(VariantType.SNV, normalizedVariant.getType()); + assertEquals("13", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(32339560), normalizedVariant.getStart()); + assertEquals("A", normalizedVariant.getReference()); + assertEquals("G", normalizedVariant.getAlternate()); + + + //125928 chr13:32332369:AACAGTTGT:GATACTTCAG +// chr13:32332369:A:G +// chr13:32332371:C:T +// chr13:32332373:G:C +// chr13:32332375:T:TCA +// chr13:32332376:GT:G + + variant = new Variant("13:32332369:AACAGTTGT:GATACTTCAG"); + normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); + assertEquals(5, normalizedVariantList.size()); + + normalizedVariant = normalizedVariantList.get(0); + assertEquals(VariantType.SNV, normalizedVariant.getType()); + assertEquals("13", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(32332369), normalizedVariant.getStart()); + assertEquals("A", normalizedVariant.getReference()); + assertEquals("G", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(1); + assertEquals(VariantType.SNV, normalizedVariant.getType()); + assertEquals("13", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(32332371), normalizedVariant.getStart()); + assertEquals("C", normalizedVariant.getReference()); + assertEquals("T", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(2); + assertEquals(VariantType.SNV, normalizedVariant.getType()); + assertEquals("13", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(32332373), normalizedVariant.getStart()); + assertEquals("G", normalizedVariant.getReference()); + assertEquals("C", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(3); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("13", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(32332376), normalizedVariant.getStart()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("CA", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(4); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("13", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(32332377), normalizedVariant.getStart()); + assertEquals("T", normalizedVariant.getReference()); + assertEquals("", normalizedVariant.getAlternate()); + + + //clinvarID 584850 + +// chr13:32336521:AAG:A +// chr13:32336524:C:CT + + variant = new Variant("13:32336522:AGC:CT"); + normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); + assertEquals(2, normalizedVariantList.size()); + + normalizedVariant = normalizedVariantList.get(0); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("13", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(32336522), normalizedVariant.getStart()); + assertEquals("AG", normalizedVariant.getReference()); + assertEquals("", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(1); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("13", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(32336525), normalizedVariant.getStart()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("T", normalizedVariant.getAlternate()); + + } + + @Test public void testMVNDoubleDeletions() { @@ -237,36 +367,142 @@ public void testNoVCV() { VariantNormalizer variantNormalizer = new VariantNormalizer(variantNormalizerConfig); -// chr13:32316508:GAC:ATCGATCGAT -// chr13:32316508:G:ATCGATCG -// chr13:32316510:C:T +// 407332 chr11:47351272:TGG:CCTCC +// chr11:47351272:T:CCT +// chr11:47351273:G:C +// chr11:47351274:G:C - Variant variant = new Variant("13:32316508:GAC:ATCGATCGAT"); + Variant variant = new Variant("11:47351272:TGG:CCTCC"); List normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); assertEquals(3, normalizedVariantList.size()); Variant normalizedVariant = normalizedVariantList.get(0); assertEquals(VariantType.INDEL, normalizedVariant.getType()); - assertEquals("15", normalizedVariant.getChromosome()); - assertEquals(Integer.valueOf(42410982), normalizedVariant.getStart()); + assertEquals("11", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(47351272), normalizedVariant.getStart()); assertEquals("", normalizedVariant.getReference()); - assertEquals("TC", normalizedVariant.getAlternate()); + assertEquals("CC", normalizedVariant.getAlternate()); normalizedVariant = normalizedVariantList.get(1); assertEquals(VariantType.SNV, normalizedVariant.getType()); - assertEquals("15", normalizedVariant.getChromosome()); - assertEquals(Integer.valueOf(42410983), normalizedVariant.getStart()); + assertEquals("11", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(47351273), normalizedVariant.getStart()); assertEquals("G", normalizedVariant.getReference()); - assertEquals("T", normalizedVariant.getAlternate()); + assertEquals("C", normalizedVariant.getAlternate()); normalizedVariant = normalizedVariantList.get(2); + assertEquals(VariantType.SNV, normalizedVariant.getType()); + assertEquals("11", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(47351274), normalizedVariant.getStart()); + assertEquals("G", normalizedVariant.getReference()); + assertEquals("C", normalizedVariant.getAlternate()); + + // 419158 + variant = new Variant("2:108907934:AGCCCTG:CGGGCTCCTCATCA"); + normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); + assertEquals(5, normalizedVariantList.size()); + +// chr2:108907934:A:C +// chr2:108907935:G:GGG +// chr2:108907936:C:CT +// chr2:108907939:T:TCATC +// chr2:108907940:G:A + + normalizedVariant = normalizedVariantList.get(0); + assertEquals(VariantType.SNV, normalizedVariant.getType()); + assertEquals("2", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(108907934), normalizedVariant.getStart()); + assertEquals("A", normalizedVariant.getReference()); + assertEquals("C", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(1); assertEquals(VariantType.INDEL, normalizedVariant.getType()); - assertEquals("15", normalizedVariant.getChromosome()); - assertEquals(Integer.valueOf(42410985), normalizedVariant.getStart()); + assertEquals("2", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(108907936), normalizedVariant.getStart()); assertEquals("", normalizedVariant.getReference()); - assertEquals("TC", normalizedVariant.getAlternate()); + assertEquals("GG", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(2); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("2", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(108907939), normalizedVariant.getStart()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("T", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(3); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("2", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(108907940), normalizedVariant.getStart()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("CATC", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(4); + assertEquals(VariantType.SNV, normalizedVariant.getType()); + assertEquals("2", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(108907940), normalizedVariant.getStart()); + assertEquals("G", normalizedVariant.getReference()); + assertEquals("A", normalizedVariant.getAlternate()); + + + // 141142 + variant = new Variant("17:43063370:ACCCCTAAAGAGATCATAGA:TATT"); + normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); + assertEquals(4, normalizedVariantList.size()); + +// chr17:43063369:CACCCC:C +// chr17:43063375:TAAAGAG:T +// chr17:43063383:TCA:T +// chr17:43063386:TAGA:T + + normalizedVariant = normalizedVariantList.get(0); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("17", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(43063370), normalizedVariant.getStart()); + assertEquals("ACCCC", normalizedVariant.getReference()); + assertEquals("", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(1); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("17", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(43063376), normalizedVariant.getStart()); + assertEquals("AAAGAG", normalizedVariant.getReference()); + assertEquals("", normalizedVariant.getAlternate()); + normalizedVariant = normalizedVariantList.get(2); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("17", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(43063384), normalizedVariant.getStart()); + assertEquals("CA", normalizedVariant.getReference()); + assertEquals("", normalizedVariant.getAlternate()); + + normalizedVariant = normalizedVariantList.get(3); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("17", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(43063387), normalizedVariant.getStart()); + assertEquals("AGA", normalizedVariant.getReference()); + assertEquals("", normalizedVariant.getAlternate()); + + // ClinVarID 90202 + variant = new Variant("3:37006995:AG:GTT"); + normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); + assertEquals(2, normalizedVariantList.size()); + +// chr3:37006994:AA:A +// chr3:37006996:G:GTT + + normalizedVariant = normalizedVariantList.get(0); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("3", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(37006995), normalizedVariant.getStart()); + assertEquals("A", normalizedVariant.getReference()); + assertEquals("", normalizedVariant.getAlternate()); + normalizedVariant = normalizedVariantList.get(1); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("3", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(37006997), normalizedVariant.getStart()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("TT", normalizedVariant.getAlternate()); } @@ -305,6 +541,42 @@ public void testMVNDeletions() { } + @Test + public void testClinical() { + + VariantNormalizer.VariantNormalizerConfig variantNormalizerConfig + = (new VariantNormalizer.VariantNormalizerConfig()) + .setReuseVariants(true) + .setNormalizeAlleles(true) + .setDecomposeMNVs(true); + + VariantNormalizer variantNormalizer = new VariantNormalizer(variantNormalizerConfig); + + Variant variant = new Variant("3:37089111:TGTTGAGTTTCTGAA:T"); + List normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); + assertEquals(1, normalizedVariantList.size()); + + Variant normalizedVariant = normalizedVariantList.get(0); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("3", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(37089111), normalizedVariant.getStart()); + assertEquals("GTTGAGTTTCTGAA", normalizedVariant.getReference()); + assertEquals("", normalizedVariant.getAlternate()); + + + variant = new Variant("13:32912901:TAAGA:T"); + normalizedVariantList = variantNormalizer.apply(Collections.singletonList(variant)); + assertEquals(1, normalizedVariantList.size()); + + normalizedVariant = normalizedVariantList.get(0); + assertEquals(VariantType.INDEL, normalizedVariant.getType()); + assertEquals("13", normalizedVariant.getChromosome()); + assertEquals(Integer.valueOf(32912901), normalizedVariant.getStart()); + assertEquals("AAGA", normalizedVariant.getReference()); + assertEquals("", normalizedVariant.getAlternate()); + + } + @Test public void testNormalizeSamplesDataMNV() throws NonStandardCompliantSampleField { @@ -336,9 +608,6 @@ public void testNormalizeSamplesDataMNV() throws NonStandardCompliantSampleField assertEquals(106, variants.get(4).getStart().intValue()); assertEquals(108, variants.get(4).getEnd().intValue()); - - - assertEquals(0, snp.getStudies().get(0).getSecondaryAlternates().size()); assertEquals(0, indel.getStudies().get(0).getSecondaryAlternates().size());