From 2dddc26a306d8123c82cd876ad4d9e64c8bd25d8 Mon Sep 17 00:00:00 2001 From: Steve Huang Date: Wed, 9 May 2018 23:18:55 -0400 Subject: [PATCH 1/4] (SV) refactor commit: * SvDiscoveryInputMetaData fields made private and replaced with getters * refactor test utils and data provider for sv discovery subpackage * make changes in StructuralVariationDiscoveryPipelineSparkIntegrationTest to accomodate later integration tests --- ...cturalVariationDiscoveryPipelineSpark.java | 24 +-- ...rVariantsFromContigAlignmentsSAMSpark.java | 18 +- ...romLocalAssemblyContigAlignmentsSpark.java | 36 ++-- .../discovery/SvDiscoveryInputMetaData.java | 84 ++++++-- .../inference/CpxVariantInterpreter.java | 8 +- .../SimpleNovelAdjacencyInterpreter.java | 20 +- .../AnnotatedVariantProducerUnitTest.java | 10 +- ...coveryTestUtilsAndCommonDataProvider.java} | 25 ++- .../SimpleSVDiscoveryTestDataProvider.java | 197 ++++++++---------- .../sv/discovery/SimpleSVTypeUnitTest.java | 38 ++-- .../alignment/AlignedAssemblyUnitTest.java | 10 +- .../AlignedContigGeneratorUnitTest.java | 8 +- .../alignment/AlignmentIntervalUnitTest.java | 14 +- ...yContigAlignmentsConfigPickerUnitTest.java | 42 ++-- ...ContigWithFineTunedAlignmentsUnitTest.java | 6 +- .../ContigAlignmentsModifierUnitTest.java | 4 +- .../BreakpointComplicationsUnitTest.java | 4 +- .../BreakpointsInferenceUnitTest.java | 4 +- .../inference/CpxSVInferenceTestUtils.java | 4 +- ...ariantCanonicalRepresentationUnitTest.java | 4 +- ...VariantInducingAssemblyContigUnitTest.java | 6 +- .../CpxVariantInterpreterUnitTest.java | 65 +++--- ...NovelAdjacencyAndAltHaplotypeUnitTest.java | 38 ++-- .../inference/SimpleChimeraUnitTest.java | 70 +++---- ...cencyAndSimpleChimeraEvidenceUnitTest.java | 5 +- ...DiscoveryPipelineSparkIntegrationTest.java | 36 ++-- .../spark/sv/utils/SVVCFWriterUnitTest.java | 6 +- 27 files changed, 426 insertions(+), 360 deletions(-) rename src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/{SVTestUtils.java => SVDiscoveryTestUtilsAndCommonDataProvider.java} (73%) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java index 01071f38a6e..b54cb3efaa2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java @@ -182,9 +182,9 @@ protected void runTool( final JavaSparkContext ctx ) { final List annotatedVariants = processEvidenceTargetLinks(assemblyBasedVariants, svDiscoveryInputMetaData); - final String outputPath = svDiscoveryInputMetaData.outputPath; - final SAMSequenceDictionary refSeqDictionary = svDiscoveryInputMetaData.referenceData.referenceSequenceDictionaryBroadcast.getValue(); - final Logger toolLogger = svDiscoveryInputMetaData.toolLogger; + final String outputPath = svDiscoveryInputMetaData.getOutputPath(); + final SAMSequenceDictionary refSeqDictionary = svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue(); + final Logger toolLogger = svDiscoveryInputMetaData.getToolLogger(); SVVCFWriter.writeVCF(annotatedVariants, outputPath + "inv_del_ins.vcf", refSeqDictionary, toolLogger); // TODO: 1/14/18 this is the next version of precise variant calling @@ -232,12 +232,12 @@ private static List processEvidenceTargetLinks(List annotatedVariants; - if (svDiscoveryInputMetaData.sampleSpecificData.evidenceTargetLinks != null) { - final PairedStrandedIntervalTree evidenceTargetLinks = svDiscoveryInputMetaData.sampleSpecificData.evidenceTargetLinks; - final ReadMetadata readMetadata = svDiscoveryInputMetaData.sampleSpecificData.readMetadata; - final ReferenceMultiSource reference = svDiscoveryInputMetaData.referenceData.referenceBroadcast.getValue(); - final DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection discoverStageArgs = svDiscoveryInputMetaData.discoverStageArgs; - final Logger toolLogger = svDiscoveryInputMetaData.toolLogger; + if (svDiscoveryInputMetaData.getSampleSpecificData().getEvidenceTargetLinks() != null) { + final PairedStrandedIntervalTree evidenceTargetLinks = svDiscoveryInputMetaData.getSampleSpecificData().getEvidenceTargetLinks(); + final ReadMetadata readMetadata = svDiscoveryInputMetaData.getSampleSpecificData().getReadMetadata(); + final ReferenceMultiSource reference = svDiscoveryInputMetaData.getReferenceData().getReferenceBroadcast().getValue(); + final DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection discoverStageArgs = svDiscoveryInputMetaData.getDiscoverStageArgs(); + final Logger toolLogger = svDiscoveryInputMetaData.getToolLogger(); // annotate with evidence links annotatedVariants = AnnotatedVariantProducer. @@ -265,7 +265,7 @@ private static void experimentalInterpretation(final JavaSparkContext ctx, final JavaRDD assemblyRawAlignments = getContigRawAlignments(ctx, assembledEvidenceResults, svDiscoveryInputMetaData); - final String updatedOutputPath = svDiscoveryInputMetaData.outputPath + "experimentalInterpretation_"; + final String updatedOutputPath = svDiscoveryInputMetaData.getOutputPath() + "experimentalInterpretation_"; svDiscoveryInputMetaData.updateOutputPath(updatedOutputPath); SvDiscoverFromLocalAssemblyContigAlignmentsSpark.AssemblyContigsClassifiedByAlignmentSignatures contigsByPossibleRawTypes @@ -279,8 +279,8 @@ private static JavaRDD getContigRawAlignments(final JavaSparkContext c final FindBreakpointEvidenceSpark.AssembledEvidenceResults assembledEvidenceResults, final SvDiscoveryInputMetaData svDiscoveryInputMetaData) { final Broadcast referenceSequenceDictionaryBroadcast = - svDiscoveryInputMetaData.referenceData.referenceSequenceDictionaryBroadcast; - final Broadcast headerBroadcast = svDiscoveryInputMetaData.sampleSpecificData.headerBroadcast; + svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast(); + final Broadcast headerBroadcast = svDiscoveryInputMetaData.getSampleSpecificData().getHeaderBroadcast(); final SAMFileHeader headerForReads = headerBroadcast.getValue(); final SAMReadGroupRecord contigAlignmentsReadGroup = new SAMReadGroupRecord(SVUtils.GATKSV_CONTIG_ALIGNMENTS_READ_GROUP_ID); final List refNames = SequenceDictionaryUtils.getContigNamesList(referenceSequenceDictionaryBroadcast.getValue()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSpark.java index f520e282dbb..43b3f086e16 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSpark.java @@ -147,13 +147,13 @@ protected void runTool(final JavaSparkContext ctx) { final JavaRDD parsedContigAlignments = new SvDiscoverFromLocalAssemblyContigAlignmentsSpark .SAMFormattedContigAlignmentParser(getReads(), - svDiscoveryInputMetaData.sampleSpecificData.headerBroadcast.getValue(), true) + svDiscoveryInputMetaData.getSampleSpecificData().getHeaderBroadcast().getValue(), true) .getAlignedContigs(); // assembly-based breakpoints List annotatedVariants = discoverVariantsFromChimeras(svDiscoveryInputMetaData, parsedContigAlignments); - final SAMSequenceDictionary refSeqDictionary = svDiscoveryInputMetaData.referenceData.referenceSequenceDictionaryBroadcast.getValue(); + final SAMSequenceDictionary refSeqDictionary = svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue(); SVVCFWriter.writeVCF(annotatedVariants, vcfOutputFile, refSeqDictionary, localLogger); } @@ -162,7 +162,7 @@ public static List discoverVariantsFromChimeras(final SvDiscover final JavaRDD alignedContigs) { final Broadcast referenceSequenceDictionaryBroadcast = - svDiscoveryInputMetaData.referenceData.referenceSequenceDictionaryBroadcast; + svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast(); final JavaPairRDD> contigSeqAndChimeras = alignedContigs @@ -175,12 +175,12 @@ public static List discoverVariantsFromChimeras(final SvDiscover return new Tuple2<>(alignedContig.getContigSequence(), chimeras); }); - final Broadcast referenceBroadcast = svDiscoveryInputMetaData.referenceData.referenceBroadcast; - final List assembledIntervals = svDiscoveryInputMetaData.sampleSpecificData.assembledIntervals; - final Broadcast> cnvCallsBroadcast = svDiscoveryInputMetaData.sampleSpecificData.cnvCallsBroadcast; - final String sampleId = svDiscoveryInputMetaData.sampleSpecificData.sampleId; - final DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection discoverStageArgs = svDiscoveryInputMetaData.discoverStageArgs; - final Logger toolLogger = svDiscoveryInputMetaData.toolLogger; + final Broadcast referenceBroadcast = svDiscoveryInputMetaData.getReferenceData().getReferenceBroadcast(); + final List assembledIntervals = svDiscoveryInputMetaData.getSampleSpecificData().getAssembledIntervals(); + final Broadcast> cnvCallsBroadcast = svDiscoveryInputMetaData.getSampleSpecificData().getCnvCallsBroadcast(); + final String sampleId = svDiscoveryInputMetaData.getSampleSpecificData().getSampleId(); + final DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection discoverStageArgs = svDiscoveryInputMetaData.getDiscoverStageArgs(); + final Logger toolLogger = svDiscoveryInputMetaData.getToolLogger(); final JavaPairRDD> narlsAndSources = contigSeqAndChimeras diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java index beb002742b6..8ea3c1db009 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java @@ -157,9 +157,9 @@ protected void runTool(final JavaSparkContext ctx) { //================================================================================================================== public static final class AssemblyContigsClassifiedByAlignmentSignatures { - final JavaRDD unknown; - final JavaRDD simple; - final JavaRDD complex; + private final JavaRDD unknown; + private final JavaRDD simple; + private final JavaRDD complex; private AssemblyContigsClassifiedByAlignmentSignatures(final JavaRDD contigs) { unknown = contigs.filter(tig -> tig.getAlignmentSignatureBasicType().equals(UNKNOWN)).cache(); @@ -167,6 +167,18 @@ private AssemblyContigsClassifiedByAlignmentSignatures(final JavaRDD tig.getAlignmentSignatureBasicType().equals(COMPLEX)).cache(); } + public JavaRDD getContigsWithSignatureClassifiedAsUnknown() { + return unknown; + } + + public JavaRDD getContigsWithSignatureClassifiedAsSimpleChimera() { + return simple; + } + + public JavaRDD getContigsWithSignatureClassifiedAsComplex() { + return complex; + } + /** * Write SAM file, if requested, for original alignments of contigs recognized as "Ambiguous", "Incomplete", and "MisAssemblySuspect" * TODO: 11/17/17 salvation on assembly contigs that 1) has ambiguous "best" configuration, and 2) has incomplete picture; and flag accordingly @@ -206,9 +218,9 @@ private void writeSAMfilesForUnknown(final String outputPrefix, final JavaRDD assemblyRawAlignments) { - final Broadcast headerBroadcast = svDiscoveryInputMetaData.sampleSpecificData.headerBroadcast; - final Broadcast> canonicalChromosomesBroadcast = svDiscoveryInputMetaData.referenceData.canonicalChromosomesBroadcast; - final Logger toolLogger = svDiscoveryInputMetaData.toolLogger; + final Broadcast headerBroadcast = svDiscoveryInputMetaData.getSampleSpecificData().getHeaderBroadcast(); + final Broadcast> canonicalChromosomesBroadcast = svDiscoveryInputMetaData.getReferenceData().getCanonicalChromosomesBroadcast(); + final Logger toolLogger = svDiscoveryInputMetaData.getToolLogger(); final JavaRDD contigsWithChimericAlignmentsReconstructed = AssemblyContigAlignmentsConfigPicker @@ -236,7 +248,7 @@ public static void dispatchJobs(final AssemblyContigsClassifiedByAlignmentSignat final JavaRDD assemblyRawAlignments, final boolean writeSAMFiles) { - final String outputPrefixWithSampleName = svDiscoveryInputMetaData.outputPath; + final String outputPrefixWithSampleName = svDiscoveryInputMetaData.getOutputPath(); // TODO: 1/10/18 bring back read annotation, see ticket 4228 @@ -244,19 +256,19 @@ public static void dispatchJobs(final AssemblyContigsClassifiedByAlignmentSignat SimpleNovelAdjacencyInterpreter.makeInterpretation(contigsByPossibleRawTypes.simple, svDiscoveryInputMetaData); contigsByPossibleRawTypes.simple.unpersist(); SVVCFWriter.writeVCF(simpleVariants, outputPrefixWithSampleName + "NonComplex.vcf", - svDiscoveryInputMetaData.referenceData.referenceSequenceDictionaryBroadcast.getValue(), - svDiscoveryInputMetaData.toolLogger); + svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue(), + svDiscoveryInputMetaData.getToolLogger()); final List complexVariants = CpxVariantInterpreter.makeInterpretation(contigsByPossibleRawTypes.complex, svDiscoveryInputMetaData); contigsByPossibleRawTypes.complex.unpersist(); SVVCFWriter.writeVCF(complexVariants, outputPrefixWithSampleName + "Complex.vcf", - svDiscoveryInputMetaData.referenceData.referenceSequenceDictionaryBroadcast.getValue(), - svDiscoveryInputMetaData.toolLogger); + svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue(), + svDiscoveryInputMetaData.getToolLogger()); if (writeSAMFiles) { contigsByPossibleRawTypes.writeSAMfilesForUnknown(outputPrefixWithSampleName, assemblyRawAlignments, - svDiscoveryInputMetaData.sampleSpecificData.headerBroadcast.getValue()); + svDiscoveryInputMetaData.getSampleSpecificData().getHeaderBroadcast().getValue()); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoveryInputMetaData.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoveryInputMetaData.java index ee754926586..85dae419f98 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoveryInputMetaData.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoveryInputMetaData.java @@ -22,10 +22,30 @@ public final class SvDiscoveryInputMetaData { + public ReferenceData getReferenceData() { + return referenceData; + } + + public SampleSpecificData getSampleSpecificData() { + return sampleSpecificData; + } + + public DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection getDiscoverStageArgs() { + return discoverStageArgs; + } + + public Logger getToolLogger() { + return toolLogger; + } + + public String getOutputPath() { + return outputPath; + } + public static final class ReferenceData { - public final Broadcast> canonicalChromosomesBroadcast; - public final Broadcast referenceBroadcast; - public final Broadcast referenceSequenceDictionaryBroadcast; + private final Broadcast> canonicalChromosomesBroadcast; + private final Broadcast referenceBroadcast; + private final Broadcast referenceSequenceDictionaryBroadcast; ReferenceData(final Broadcast> canonicalChromosomesBroadcast, final Broadcast referenceBroadcast, @@ -34,16 +54,28 @@ public static final class ReferenceData { this.referenceBroadcast = referenceBroadcast; this.referenceSequenceDictionaryBroadcast = referenceSequenceDictionaryBroadcast; } + + public Broadcast> getCanonicalChromosomesBroadcast() { + return canonicalChromosomesBroadcast; + } + + public Broadcast getReferenceBroadcast() { + return referenceBroadcast; + } + + public Broadcast getReferenceSequenceDictionaryBroadcast() { + return referenceSequenceDictionaryBroadcast; + } } public static final class SampleSpecificData { - public final String sampleId; + private final String sampleId; - public final ReadMetadata readMetadata; - public final Broadcast headerBroadcast; - public final Broadcast> cnvCallsBroadcast; - public final PairedStrandedIntervalTree evidenceTargetLinks; - public final List assembledIntervals; + private final ReadMetadata readMetadata; + private final Broadcast headerBroadcast; + private final Broadcast> cnvCallsBroadcast; + private final PairedStrandedIntervalTree evidenceTargetLinks; + private final List assembledIntervals; public SampleSpecificData(final String sampleId, final Broadcast> cnvCallsBroadcast, final List assembledIntervals, @@ -57,17 +89,41 @@ public SampleSpecificData(final String sampleId, final Broadcast getHeaderBroadcast() { + return headerBroadcast; + } + + public Broadcast> getCnvCallsBroadcast() { + return cnvCallsBroadcast; + } + + public PairedStrandedIntervalTree getEvidenceTargetLinks() { + return evidenceTargetLinks; + } + + public List getAssembledIntervals() { + return assembledIntervals; + } } - public final ReferenceData referenceData; + private final ReferenceData referenceData; - public final SampleSpecificData sampleSpecificData; + private final SampleSpecificData sampleSpecificData; - public final DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection discoverStageArgs; + private final DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection discoverStageArgs; - public final Logger toolLogger; + private final Logger toolLogger; - public String outputPath; + private String outputPath; public SvDiscoveryInputMetaData(final JavaSparkContext ctx, final DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection discoverStageArgs, diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInterpreter.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInterpreter.java index e675154258e..186e716c18a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInterpreter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInterpreter.java @@ -42,8 +42,8 @@ public final class CpxVariantInterpreter { public static List makeInterpretation(final JavaRDD assemblyContigs, final SvDiscoveryInputMetaData svDiscoveryInputMetaData) { - final Broadcast referenceBroadcast = svDiscoveryInputMetaData.referenceData.referenceBroadcast; - final Broadcast referenceSequenceDictionaryBroadcast = svDiscoveryInputMetaData.referenceData.referenceSequenceDictionaryBroadcast; + final Broadcast referenceBroadcast = svDiscoveryInputMetaData.getReferenceData().getReferenceBroadcast(); + final Broadcast referenceSequenceDictionaryBroadcast = svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast(); // almost every thing happens in this series of maps final JavaPairRDD> interpretationAndAssemblyEvidence = @@ -51,8 +51,8 @@ public static List makeInterpretation(final JavaRDD getOneVariantFromOneContig(tig, referenceSequenceDictionaryBroadcast.getValue())) .groupByKey(); // two contigs could give the same variant - if (svDiscoveryInputMetaData.discoverStageArgs.outputCpxResultsInHumanReadableFormat) { - writeResultsForHumanConsumption(svDiscoveryInputMetaData.outputPath, interpretationAndAssemblyEvidence); + if (svDiscoveryInputMetaData.getDiscoverStageArgs().outputCpxResultsInHumanReadableFormat) { + writeResultsForHumanConsumption(svDiscoveryInputMetaData.getOutputPath(), interpretationAndAssemblyEvidence); } return interpretationAndAssemblyEvidence.map(pair -> turnIntoVariantContext(pair, referenceBroadcast)).collect(); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyInterpreter.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyInterpreter.java index edf3b1b7b55..0b3d84ec416 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyInterpreter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyInterpreter.java @@ -49,11 +49,11 @@ public static List makeInterpretation(final JavaRDD referenceBroadcast = svDiscoveryInputMetaData.referenceData.referenceBroadcast; + final Broadcast referenceBroadcast = svDiscoveryInputMetaData.getReferenceData().getReferenceBroadcast(); final Broadcast referenceSequenceDictionaryBroadcast = - svDiscoveryInputMetaData.referenceData.referenceSequenceDictionaryBroadcast; - final String sampleId = svDiscoveryInputMetaData.sampleSpecificData.sampleId; - final Broadcast> cnvCallsBroadcast = svDiscoveryInputMetaData.sampleSpecificData.cnvCallsBroadcast; + svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast(); + final String sampleId = svDiscoveryInputMetaData.getSampleSpecificData().getSampleId(); + final Broadcast> cnvCallsBroadcast = svDiscoveryInputMetaData.getSampleSpecificData().getCnvCallsBroadcast(); final List annotatedSimpleVariants = narlAndAltSeqAndEvidenceAndTypes .flatMap(pair -> @@ -73,11 +73,11 @@ public static List makeInterpretation(final JavaRDD narls) { final Broadcast referenceSequenceDictionaryBroadcast = - svDiscoveryInputMetaData.referenceData.referenceSequenceDictionaryBroadcast; - final List assembledIntervals = svDiscoveryInputMetaData.sampleSpecificData.assembledIntervals; + svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast(); + final List assembledIntervals = svDiscoveryInputMetaData.getSampleSpecificData().getAssembledIntervals(); final StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection - discoverStageArgs = svDiscoveryInputMetaData.discoverStageArgs; - final Logger toolLogger = svDiscoveryInputMetaData.toolLogger; + discoverStageArgs = svDiscoveryInputMetaData.getDiscoverStageArgs(); + final Logger toolLogger = svDiscoveryInputMetaData.getToolLogger(); SvDiscoveryUtils.evaluateIntervalsAndNarls(assembledIntervals, narls, referenceSequenceDictionaryBroadcast.getValue(), discoverStageArgs, toolLogger); } @@ -92,8 +92,8 @@ private static void evaluateNarls(final SvDiscoveryInputMetaData svDiscoveryInpu inferTypeFromSingleContigSimpleChimera(final JavaRDD assemblyContigs, final SvDiscoveryInputMetaData svDiscoveryInputMetaData) { - final Broadcast referenceSequenceDictionaryBroadcast = svDiscoveryInputMetaData.referenceData.referenceSequenceDictionaryBroadcast; - final Broadcast referenceBroadcast = svDiscoveryInputMetaData.referenceData.referenceBroadcast; + final Broadcast referenceSequenceDictionaryBroadcast = svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast(); + final Broadcast referenceBroadcast = svDiscoveryInputMetaData.getReferenceData().getReferenceBroadcast(); return assemblyContigs diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducerUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducerUnitTest.java index 52085f9b0d6..1ceb3d70153 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducerUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducerUnitTest.java @@ -67,7 +67,7 @@ public void testEvidenceAnnotation(final TestDataForSimpleSVs testData, final List chimericAlignments = Collections.singletonList( new SimpleChimera(testData.firstAlignment, testData.secondAlignment, Collections.emptyList(), testData.evidenceAssemblyContigName, NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, - b37_seqDict)); + SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); final Map attributeMap = AnnotatedVariantProducer.getEvidenceRelatedAnnotations(chimericAlignments); @@ -140,7 +140,7 @@ public void testIntegrative(final TestDataForSimpleSVs testData, final List evidence = Collections.singletonList(new SimpleChimera(testData.firstAlignment, testData.secondAlignment, Collections.emptyList(), testData.evidenceAssemblyContigName, NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, - b37_seqDict)); + SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); final String sampleID = "testSample"; final VariantContext variantContext = @@ -161,8 +161,8 @@ private Object[][] dataForIntegrativeTest() { final List data = new ArrayList<>(20); final JavaSparkContext testSparkContext = SparkContextFactory.getTestSparkContext(); - final Broadcast referenceBroadcast = testSparkContext.broadcast(b37_reference); - final Broadcast refSeqDictBroadcast = testSparkContext.broadcast(b37_seqDict); + final Broadcast referenceBroadcast = testSparkContext.broadcast(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21); + final Broadcast refSeqDictBroadcast = testSparkContext.broadcast(SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); final Broadcast> broadcastCNVCalls = null; @@ -342,7 +342,7 @@ public void testProcessEvidenceTargetLinks(final List etls, final List processedVariantContexts = AnnotatedVariantProducer.annotateBreakpointBasedCallsWithImpreciseEvidenceLinks(inputVariants, - evidenceTree, metadata, b37_reference, params, localLogger); + evidenceTree, metadata, SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, params, localLogger); VariantContextTestUtils.assertEqualVariants(processedVariantContexts, expectedVariants); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SVTestUtils.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SVDiscoveryTestUtilsAndCommonDataProvider.java similarity index 73% rename from src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SVTestUtils.java rename to src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SVDiscoveryTestUtilsAndCommonDataProvider.java index bfef640a288..06e0dc38979 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SVTestUtils.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SVDiscoveryTestUtilsAndCommonDataProvider.java @@ -2,8 +2,12 @@ import htsjdk.samtools.Cigar; import htsjdk.samtools.SAMFlag; +import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.TextCigarCodec; import htsjdk.samtools.util.SequenceUtil; +import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource; +import org.broadinstitute.hellbender.engine.datasources.ReferenceWindowFunctions; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignedContig; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignmentInterval; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.ContigAlignmentsModifier; @@ -11,12 +15,23 @@ import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.read.CigarUtils; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; +import java.util.*; -public final class SVTestUtils { +import static org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoveryUtils.getCanonicalChromosomes; + +public final class SVDiscoveryTestUtilsAndCommonDataProvider { + + // data block ====================================================================================================== + public static final ReferenceMultiSource b37_reference_20_21 = new ReferenceMultiSource( + GATKBaseTest.b37_reference_20_21, ReferenceWindowFunctions.IDENTITY_FUNCTION); + public static final SAMSequenceDictionary b37_seqDict_20_21 = b37_reference_20_21.getReferenceSequenceDictionary(null); + public static final Set b37_canonicalChromosomes = getCanonicalChromosomes(null, b37_seqDict_20_21); + public static final ReferenceMultiSource b38_reference_chr20_chr21 = new ReferenceMultiSource( + GATKBaseTest.b38_reference_20_21, ReferenceWindowFunctions.IDENTITY_FUNCTION); + public static final SAMSequenceDictionary b38_seqDict_chr20_chr21 = b38_reference_chr20_chr21.getReferenceSequenceDictionary(null); + public static final Set b38_canonicalChromosomes = getCanonicalChromosomes(null, b38_seqDict_chr20_chr21); + + // utils block ===================================================================================================== public static byte[] getReverseComplimentCopy(final byte[] sequence) { final byte[] sequenceCopy = Arrays.copyOf(sequence, sequence.length); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVDiscoveryTestDataProvider.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVDiscoveryTestDataProvider.java index 1a8a5d9440b..d7e8a9345c7 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVDiscoveryTestDataProvider.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVDiscoveryTestDataProvider.java @@ -1,11 +1,7 @@ package org.broadinstitute.hellbender.tools.spark.sv.discovery; -import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.TextCigarCodec; import htsjdk.samtools.util.SequenceUtil; -import org.broadinstitute.hellbender.GATKBaseTest; -import org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource; -import org.broadinstitute.hellbender.engine.datasources.ReferenceWindowFunctions; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignedContig; @@ -20,7 +16,6 @@ import java.io.IOException; import java.util.*; -import static org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoveryUtils.getCanonicalChromosomes; import static org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigWithFineTunedAlignments.NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME; /** @@ -45,16 +40,6 @@ public static final class TestDataForSimpleSVs { } } - public static final ReferenceMultiSource b37_reference = new ReferenceMultiSource( - GATKBaseTest.b37_reference_20_21, ReferenceWindowFunctions.IDENTITY_FUNCTION); - public static final SAMSequenceDictionary b37_seqDict = b37_reference.getReferenceSequenceDictionary(null); - public static final Set b37_canonicalChromosomes = getCanonicalChromosomes(null, b37_seqDict); - - public static final ReferenceMultiSource b38_reference = new ReferenceMultiSource( - GATKBaseTest.b38_reference_20_21, ReferenceWindowFunctions.IDENTITY_FUNCTION); - public static final SAMSequenceDictionary b38_seqDict = b38_reference.getReferenceSequenceDictionary(null); - public static final Set b38_canonicalChromosomes = getCanonicalChromosomes(null, b38_seqDict); - // the chromosome that the long contig1 is supposed to be mapped to is actually chr19, but to make tests runnable, we could only use "20" or "21" // todo: this should be fixed, but since the exact mapped to chromosome is not important now, we push it to later public static final String chrForLongContig1 = "20"; @@ -266,8 +251,8 @@ public static List> getAllTes private static TestDataForSimpleSVs forSimpleInversionWithNovelInsertion_leftFlankingForwardStrandOnly() { // inversion with inserted sequence - final byte[] leftFlank = SVTestUtils.makeDummySequence(146, (byte)'A'); - final byte[] rightFlankRC = SVTestUtils.makeDummySequence(50, (byte)'C'); + final byte[] leftFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(146, (byte)'A'); + final byte[] rightFlankRC = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(50, (byte)'C'); final byte[] contigSeq = new byte[leftFlank.length+1+rightFlankRC.length]; System.arraycopy(leftFlank, 0, contigSeq, 0, leftFlank.length); contigSeq[leftFlank.length] = (byte) 'T'; @@ -277,7 +262,7 @@ public static List> getAllTes final AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 69149, 69294), 1, 146, TextCigarCodec.decode("146M51S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); final AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 69315, 69364), 148, 197, TextCigarCodec.decode("147S50M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); final AlignedContig alignedContig = new AlignedContig("asm000001:tig00001", contigSeq, Arrays.asList(region1, region2)); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), alignedContig.getContigName(), NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), alignedContig.getContigSequence(), b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), alignedContig.getContigName(), NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), alignedContig.getContigSequence(), SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); return new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001"); } @@ -289,7 +274,7 @@ public static List> getAllTes final AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval(chrForLongContig1, 20152030, 20154634), 3604, contigSequence.length, TextCigarCodec.decode("3603H24M1I611M1I1970M"), true, 60, 36, 100, ContigAlignmentsModifier.AlnModType.NONE); final AlignedContig alignedContig = new AlignedContig("asm702700:tig00001", contigSequence, Arrays.asList(region1, region2)); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(DiscoverVariantsFromContigAlignmentsSAMSpark.parseOneContig(alignedContig, b37_seqDict, true, StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.DEFAULT_MIN_ALIGNMENT_LENGTH, StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.CHIMERIC_ALIGNMENTS_HIGHMQ_THRESHOLD, true).get(0), contigSequence, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(DiscoverVariantsFromContigAlignmentsSAMSpark.parseOneContig(alignedContig, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21, true, StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.DEFAULT_MIN_ALIGNMENT_LENGTH, StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.CHIMERIC_ALIGNMENTS_HIGHMQ_THRESHOLD, true).get(0), contigSequence, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); return new TestDataForSimpleSVs(region1, region2, breakpoints, "asm702700:tig00001"); } @@ -311,14 +296,14 @@ public static List> getAllTes final List result = new ArrayList<>(); - final byte[] leftLeftPlus = SVTestUtils.makeDummySequence(100, (byte)'G'); - final byte[] leftLeftMinus = SVTestUtils.makeDummySequence(100, (byte)'C'); - final byte[] leftRightPlus = SVTestUtils.makeDummySequence(100, (byte)'C'); - final byte[] leftRightMinus = SVTestUtils.makeDummySequence(100, (byte)'G'); - final byte[] rightLeftPlus = SVTestUtils.makeDummySequence(100, (byte)'A'); - final byte[] rightLeftMinus = SVTestUtils.makeDummySequence(100, (byte)'T'); - final byte[] rightRightPlus = SVTestUtils.makeDummySequence(100, (byte)'T'); - final byte[] rightRightMinus = SVTestUtils.makeDummySequence(100, (byte)'A'); + final byte[] leftLeftPlus = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'G'); + final byte[] leftLeftMinus = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'C'); + final byte[] leftRightPlus = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'C'); + final byte[] leftRightMinus = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'G'); + final byte[] rightLeftPlus = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'A'); + final byte[] rightLeftMinus = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'T'); + final byte[] rightRightPlus = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'T'); + final byte[] rightRightMinus = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'A'); final byte[] leftHomology = "ACACA".getBytes(); final byte[] rightHomology = "TGTGT".getBytes(); {// left flanking evidence '+'/'-' strand representation @@ -328,7 +313,7 @@ public static List> getAllTes AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("20", 101, 205), 1, 105, TextCigarCodec.decode("105M100S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("20", 501, 605), 101, 205, TextCigarCodec.decode("100S105M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, new ArrayList<>(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, new ArrayList<>(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); outputStream.reset(); @@ -336,7 +321,7 @@ public static List> getAllTes contigSeq = outputStream.toByteArray(); region1 = new AlignmentInterval(new SimpleInterval("20", 501, 605), 1, 105, TextCigarCodec.decode("105M100S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); region2 = new AlignmentInterval(new SimpleInterval("20", 101, 205), 101, 205, TextCigarCodec.decode("100S105M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, new ArrayList<>(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, new ArrayList<>(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); } {// right flanking evidence '+'/'-' strand representation @@ -346,7 +331,7 @@ public static List> getAllTes AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("20", 201, 305), 1, 105, TextCigarCodec.decode("105M100S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("20", 601, 705), 101, 205, TextCigarCodec.decode("100S105M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, new ArrayList<>(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, new ArrayList<>(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); outputStream.reset(); @@ -355,7 +340,7 @@ public static List> getAllTes region1 = new AlignmentInterval(new SimpleInterval("20", 601, 705), 1, 105, TextCigarCodec.decode("105M100S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); region2 = new AlignmentInterval(new SimpleInterval("20", 201, 305), 101, 205, TextCigarCodec.decode("100S105M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, new ArrayList<>(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, new ArrayList<>(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); } return result; @@ -371,14 +356,14 @@ public static List> getAllTes final List result = new ArrayList<>(); // simple deletion '+' strand representation - final byte[] leftRefFlank = SVTestUtils.makeDummySequence(40, (byte)'A'); - final byte[] rightRefFlank = SVTestUtils.makeDummySequence(40, (byte)'G'); + final byte[] leftRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(40, (byte)'A'); + final byte[] rightRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(40, (byte)'G'); outputStream.reset(); outputStream.write(leftRefFlank);outputStream.write(rightRefFlank); byte[] contigSeq = outputStream.toByteArray(); AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 100001, 100040), 1 ,40, TextCigarCodec.decode("40M40S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100061, 100100), 41 ,80, TextCigarCodec.decode("40S40M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); // simple deletion '-' strand representation @@ -389,7 +374,7 @@ public static List> getAllTes contigSeq = outputStream.toByteArray(); region1 = new AlignmentInterval(new SimpleInterval("21", 100061, 100100), 1 ,40, TextCigarCodec.decode("40M40S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100040), 41 ,80, TextCigarCodec.decode("40S40M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); return result; @@ -404,15 +389,15 @@ public static List> getAllTes final List result = new ArrayList<>(); // simple insertion '+' strand representation - final byte[] leftRefFlank = SVTestUtils.makeDummySequence(100, (byte)'A'); - final byte[] insertedSeq = SVTestUtils.makeDummySequence(50, (byte)'C'); - final byte[] rightRefFlank = SVTestUtils.makeDummySequence(100, (byte)'T'); + final byte[] leftRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'A'); + final byte[] insertedSeq = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(50, (byte)'C'); + final byte[] rightRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'T'); outputStream.reset(); outputStream.write(leftRefFlank);outputStream.write(insertedSeq);outputStream.write(rightRefFlank); byte[] contigSeq = outputStream.toByteArray(); AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 100001, 100100), 1 ,100, TextCigarCodec.decode("100M100S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100101, 100200), 151 ,250, TextCigarCodec.decode("100S100M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); // simple insertion '-' strand representation @@ -424,7 +409,7 @@ public static List> getAllTes contigSeq = outputStream.toByteArray(); region1 = new AlignmentInterval(new SimpleInterval("21", 100101, 100200), 1 ,100, TextCigarCodec.decode("100M100S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100100), 151 ,250, TextCigarCodec.decode("100S100M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); return result; @@ -445,16 +430,16 @@ public static List> getAllTes {//fudged deletion case // '+' strand representation - final byte[] leftRefFlank = SVTestUtils.makeDummySequence(100, (byte)'A'); - final byte[] rightRefFlank = SVTestUtils.makeDummySequence(100, (byte)'G'); - final byte[] substitution = SVTestUtils.makeDummySequence(10, (byte)'C'); + final byte[] leftRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'A'); + final byte[] rightRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'G'); + final byte[] substitution = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(10, (byte)'C'); byte[] contigSeq = new byte[leftRefFlank.length + rightRefFlank.length - 50]; System.arraycopy(leftRefFlank, 0, contigSeq, 0, 70); System.arraycopy(substitution, 0, contigSeq, 70, substitution.length); System.arraycopy(rightRefFlank, 30, contigSeq, 70 + substitution.length, 70); AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 100001, 100070), 1 ,70, TextCigarCodec.decode("70M80S"), true, 60, 0, 70, ContigAlignmentsModifier.AlnModType.NONE); AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100131, 100200), 81 ,150, TextCigarCodec.decode("80S70M"), true, 60, 0, 70, ContigAlignmentsModifier.AlnModType.NONE); - NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); // '-' strand representation @@ -466,22 +451,22 @@ public static List> getAllTes System.arraycopy(leftRefFlank, 30, contigSeq, 70 + substitution.length, 70); region1 = new AlignmentInterval(new SimpleInterval("21", 100131, 100200), 1 ,70, TextCigarCodec.decode("70M80S"), false, 60, 0, 70, ContigAlignmentsModifier.AlnModType.NONE); region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100070), 81 ,150, TextCigarCodec.decode("80S70M"), false, 60, 0, 70, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); } {//fat insertion case // '+' strand representation - final byte[] leftRefFlank = SVTestUtils.makeDummySequence(50, (byte)'A'); - final byte[] rightRefFlank = SVTestUtils.makeDummySequence(50, (byte)'G'); - final byte[] substitution = SVTestUtils.makeDummySequence(60, (byte)'C'); + final byte[] leftRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(50, (byte)'A'); + final byte[] rightRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(50, (byte)'G'); + final byte[] substitution = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(60, (byte)'C'); byte[] contigSeq = new byte[leftRefFlank.length + rightRefFlank.length + 40]; System.arraycopy(leftRefFlank, 0, contigSeq, 0, 40); System.arraycopy(substitution, 0, contigSeq, 40, substitution.length); System.arraycopy(rightRefFlank, 10, contigSeq, 40 + substitution.length, 40); AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 100001, 100040), 1 ,40, TextCigarCodec.decode("40M100S"), true, 60, 0, 60, ContigAlignmentsModifier.AlnModType.NONE); AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100061, 100100), 101 ,140, TextCigarCodec.decode("100S40M"), true, 60, 0, 60, ContigAlignmentsModifier.AlnModType.NONE); - NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); // '-' strand representation @@ -493,22 +478,22 @@ public static List> getAllTes System.arraycopy(leftRefFlank, 10, contigSeq, 40 + substitution.length, 40); region1 = new AlignmentInterval(new SimpleInterval("21", 100061, 100100), 1 ,40, TextCigarCodec.decode("40M100S"), false, 60, 0, 60, ContigAlignmentsModifier.AlnModType.NONE); region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100040), 101 ,140, TextCigarCodec.decode("100S40M"), false, 60, 0, 60, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); } {//two linked variants case // '+' strand representation - final byte[] leftRefFlank = SVTestUtils.makeDummySequence(100, (byte)'A'); - final byte[] rightRefFlank = SVTestUtils.makeDummySequence(100, (byte)'G'); - final byte[] substitution = SVTestUtils.makeDummySequence(55, (byte)'C'); + final byte[] leftRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'A'); + final byte[] rightRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'G'); + final byte[] substitution = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(55, (byte)'C'); byte[] contigSeq = new byte[leftRefFlank.length + rightRefFlank.length - 5]; System.arraycopy(leftRefFlank, 0, contigSeq, 0, 70); System.arraycopy(substitution, 0, contigSeq, 70, substitution.length); System.arraycopy(rightRefFlank, 30, contigSeq, 70 + substitution.length, 70); AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 100001, 100070), 1 ,70, TextCigarCodec.decode("70M125S"), true, 60, 0, 70, ContigAlignmentsModifier.AlnModType.NONE); AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100131, 100200), 126 ,195, TextCigarCodec.decode("125S70M"), true, 60, 0, 70, ContigAlignmentsModifier.AlnModType.NONE); - NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); // '-' strand representation @@ -520,7 +505,7 @@ public static List> getAllTes System.arraycopy(leftRefFlank, 30, contigSeq, 70 + substitution.length, 70); region1 = new AlignmentInterval(new SimpleInterval("21", 100131, 100200), 1 ,70, TextCigarCodec.decode("70M125S"), false, 60, 0, 70, ContigAlignmentsModifier.AlnModType.NONE); region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100070), 126 ,195, TextCigarCodec.decode("125S70M"), false, 60, 0, 70, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); } @@ -537,15 +522,15 @@ public static List> getAllTes final List result = new ArrayList<>(); // simple deletion with homology '+' strand representation - final byte[] leftRefFlank = SVTestUtils.makeDummySequence(40, (byte)'C'); - final byte[] rightRefFlank = SVTestUtils.makeDummySequence(40, (byte)'T'); + final byte[] leftRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(40, (byte)'C'); + final byte[] rightRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(40, (byte)'T'); final byte[] homology = new byte[]{'A', 'T', 'C', 'G'}; outputStream.reset(); outputStream.write(leftRefFlank);outputStream.write(homology);outputStream.write(rightRefFlank); byte[] contigSeq = outputStream.toByteArray(); AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 100001, 100044), 1 ,44, TextCigarCodec.decode("44M40S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100079, 100122), 41 ,84, TextCigarCodec.decode("40S44M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); // simple deletion with homology '-' strand representation @@ -557,7 +542,7 @@ public static List> getAllTes contigSeq = outputStream.toByteArray(); region1 = new AlignmentInterval(new SimpleInterval("21", 100079, 100122), 1 ,44, TextCigarCodec.decode("44M40S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100044), 41 ,84, TextCigarCodec.decode("40S44M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); return result; @@ -573,9 +558,9 @@ public static List> getAllTes final List result = new ArrayList<>(); // simple tandem duplication contraction '+' strand representation - final byte[] leftRefFlank = SVTestUtils.makeDummySequence(40, (byte)'A'); - final byte[] rightRefFlank = SVTestUtils.makeDummySequence(40, (byte)'G'); - final byte[] doubleDup = SVTestUtils.makeDummySequence(20, (byte)'C'); + final byte[] leftRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(40, (byte)'A'); + final byte[] rightRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(40, (byte)'G'); + final byte[] doubleDup = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(20, (byte)'C'); final byte[] contigSeq = new byte[90]; System.arraycopy(leftRefFlank, 0, contigSeq, 0, 40); System.arraycopy(doubleDup, 0, contigSeq, 40, 10); @@ -583,7 +568,7 @@ public static List> getAllTes AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 100001, 100050), 1 ,50, TextCigarCodec.decode("50M40S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100051, 100100), 41 ,90, TextCigarCodec.decode("40S50M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); // simple tandem duplication contraction '-' strand representation @@ -595,7 +580,7 @@ public static List> getAllTes System.arraycopy(leftRefFlank, 0, contigSeq, 50, 40); region1 = new AlignmentInterval(new SimpleInterval("21", 100051, 100100), 1 ,50, TextCigarCodec.decode("50M40S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100050), 41 ,90, TextCigarCodec.decode("40S50M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); return result; @@ -615,16 +600,16 @@ public static List> getAllTes {// insertion case // '+' strand representation - final byte[] leftRefFlank = SVTestUtils.makeDummySequence(40, (byte)'A'); - final byte[] rightRefFlank = SVTestUtils.makeDummySequence(40, (byte)'G'); - final byte[] doubleDup = SVTestUtils.makeDummySequence(20, (byte)'C'); + final byte[] leftRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(40, (byte)'A'); + final byte[] rightRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(40, (byte)'G'); + final byte[] doubleDup = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(20, (byte)'C'); outputStream.reset(); outputStream.write(leftRefFlank);outputStream.write(doubleDup);outputStream.write(rightRefFlank); byte[] contigSeq = outputStream.toByteArray(); AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 100001, 100050), 1 ,50, TextCigarCodec.decode("50M50S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100041, 100090), 51 ,100, TextCigarCodec.decode("50S50M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); // '-' strand representation @@ -636,22 +621,22 @@ public static List> getAllTes contigSeq = outputStream.toByteArray(); region1 = new AlignmentInterval(new SimpleInterval("21", 100041, 100090), 1 ,50, TextCigarCodec.decode("50M50S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100050), 51 ,100, TextCigarCodec.decode("50S50M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); } {// duplication case // '+' strand representation - final byte[] leftRefFlank = SVTestUtils.makeDummySequence(40, (byte)'A'); - final byte[] rightRefFlank = SVTestUtils.makeDummySequence(40, (byte)'G'); - final byte[] doubleDup = SVTestUtils.makeDummySequence(110, (byte)'C'); + final byte[] leftRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(40, (byte)'A'); + final byte[] rightRefFlank = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(40, (byte)'G'); + final byte[] doubleDup = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(110, (byte)'C'); outputStream.reset(); outputStream.write(leftRefFlank);outputStream.write(doubleDup);outputStream.write(rightRefFlank); byte[] contigSeq = outputStream.toByteArray(); AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 100001, 100095), 1 ,95, TextCigarCodec.decode("95M95S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100041, 100135), 96 ,190, TextCigarCodec.decode("95S95M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); // '-' strand representation @@ -663,7 +648,7 @@ public static List> getAllTes contigSeq = outputStream.toByteArray(); region1 = new AlignmentInterval(new SimpleInterval("21", 100041, 100135), 1 ,95, TextCigarCodec.decode("95M95S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100095), 96 ,190, TextCigarCodec.decode("95S95M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); } @@ -692,14 +677,14 @@ public static List> getAllTes final List result = new ArrayList<>(); { - AlignmentInterval region1 = SVTestUtils.fromSAMRecordString("asm029081:tig00000\t0\t21\t26847644\t60\t1394M1675S\t*\t0\t0\tTATGGTGACAACAGTTACAGCCTCAGAGTGGCTTTGAGATGGAGATTTGAGATGGAGATTTTGGACACTCATAATCTCCATCTCAAGGAGAGGTGGCCCAGAGGCTGAATGAAGATGAGTGAAGGTAGATGTGATTCCCTTAAATTGGGGCAAAAAGGGACAAAAACAGCAGAAAATCTGTATCTTTAAAGACATATGTAATGTATTTCAGTCTATCAACTTCTCTACATAAACTTTAGCTTTTAAAAATATGTTAACGTAAGTTTGACCTTTAGTGTGTTTCTACCTGCAGGGTATTCTTATTGGAGGTTTGTTTAAAAGCATACATTTCTGATCTTGAATGGGTTACTACAAATCCATTATAATTGTTTCATATTTCATGTTGCAGATACAAGTAGGGTTGAAAAAACAGTGAGTTAAAGGCAAAAGGATGGCCGGGAACATGGCTTTTTTATTCTCTGGGTTTCTATCCAGATTTCTGTTCTTTTGCATAATGACTCCAATCTGTTGTGCACCTGTAGTTCTGGGAAATGATTCTTTTTTAATCGCTTCAACAGAGACATGGATGTTGGAGTTGCCAACTACTAAGCTGAAAAACTCCATCTATGCTCAGAAGAACATTTAATCCACTTACTTTTTCTCTTTTATTTAAAGATTAGCACTCATCAGGCATTTGTGGTAATATGCAAATATATACATAGGACATATATGTATATTTATAAGCAAAATGTGAATTGGAAAAACATTTGAATGTAGAAACAAGACCACAGGAGTAAATTTGTACAAGGCACTAGTAAAAGTGACATGTAATATGGGGTTCTTGTAGTGAGTTTCATAATCCAATTTTTGCTCCTTGATTTGAATGGGCACCCAAAATAACACATGCTATCCTAATCCCTACTCCCCATATTTTGGGTTTTATTTTTATAGAATACATATGGGCTTATATAAACATTAATCTCAACATGTTCTAATTTACATATGTAAGCTAATTTTTATTTCTAGAGATAACAGAACAAAACTCAAAACATTTGACATAAAATTATTGGAACAATTAACAGTTTGACCTATTAAACACATTATTGTCCTCTATGAACAGAGGGACTGTCTGAAAAAAAGAACAAGTTGTCTGCATTTTAAAGTGAGAGATAAGCATCAAGGTGTCAATTTCTATTTACACCTTATGTGTTCTTATTTGTTTCACTGATTCATATGTTATAGACACAATATTCTATTCACAATTTTCACGACGTCTATACCAAAGTAAGTATTCAACAAGTAGCCATGAAATGAGGAAATCTGGTAATATACATGAGCTATTAGAATTGTTTTAATGTAAACATTGTCTAGAGAAACAACTAATGTGCATATTTCATAACCGGGAAATGCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTCGCCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAGGCTCCGCCCCCTGGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCTCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT\t*\tSA:Z:21,26849022,+,1704S657M2I706M,60,2;chr10,97348533,+,1388S317M1364S,0,0;\tMD:Z:1204A189\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:1389\tXS:i:0", true); - AlignmentInterval region2 = SVTestUtils.fromSAMRecordString("asm029081:tig00000\t2048\t21\t26849022\t60\t1704H657M2I706M\t*\t0\t0\tCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT\t*\tSA:Z:21,26847644,+,1394M1675S,60,1;chr10,97348533,+,1388S317M1364S,0,0;\tMD:Z:1363\tRG:Z:GATKSVContigAlignments\tNM:i:2\tAS:i:1345\tXS:i:0", true); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), "TATGGTGACAACAGTTACAGCCTCAGAGTGGCTTTGAGATGGAGATTTGAGATGGAGATTTTGGACACTCATAATCTCCATCTCAAGGAGAGGTGGCCCAGAGGCTGAATGAAGATGAGTGAAGGTAGATGTGATTCCCTTAAATTGGGGCAAAAAGGGACAAAAACAGCAGAAAATCTGTATCTTTAAAGACATATGTAATGTATTTCAGTCTATCAACTTCTCTACATAAACTTTAGCTTTTAAAAATATGTTAACGTAAGTTTGACCTTTAGTGTGTTTCTACCTGCAGGGTATTCTTATTGGAGGTTTGTTTAAAAGCATACATTTCTGATCTTGAATGGGTTACTACAAATCCATTATAATTGTTTCATATTTCATGTTGCAGATACAAGTAGGGTTGAAAAAACAGTGAGTTAAAGGCAAAAGGATGGCCGGGAACATGGCTTTTTTATTCTCTGGGTTTCTATCCAGATTTCTGTTCTTTTGCATAATGACTCCAATCTGTTGTGCACCTGTAGTTCTGGGAAATGATTCTTTTTTAATCGCTTCAACAGAGACATGGATGTTGGAGTTGCCAACTACTAAGCTGAAAAACTCCATCTATGCTCAGAAGAACATTTAATCCACTTACTTTTTCTCTTTTATTTAAAGATTAGCACTCATCAGGCATTTGTGGTAATATGCAAATATATACATAGGACATATATGTATATTTATAAGCAAAATGTGAATTGGAAAAACATTTGAATGTAGAAACAAGACCACAGGAGTAAATTTGTACAAGGCACTAGTAAAAGTGACATGTAATATGGGGTTCTTGTAGTGAGTTTCATAATCCAATTTTTGCTCCTTGATTTGAATGGGCACCCAAAATAACACATGCTATCCTAATCCCTACTCCCCATATTTTGGGTTTTATTTTTATAGAATACATATGGGCTTATATAAACATTAATCTCAACATGTTCTAATTTACATATGTAAGCTAATTTTTATTTCTAGAGATAACAGAACAAAACTCAAAACATTTGACATAAAATTATTGGAACAATTAACAGTTTGACCTATTAAACACATTATTGTCCTCTATGAACAGAGGGACTGTCTGAAAAAAAGAACAAGTTGTCTGCATTTTAAAGTGAGAGATAAGCATCAAGGTGTCAATTTCTATTTACACCTTATGTGTTCTTATTTGTTTCACTGATTCATATGTTATAGACACAATATTCTATTCACAATTTTCACGACGTCTATACCAAAGTAAGTATTCAACAAGTAGCCATGAAATGAGGAAATCTGGTAATATACATGAGCTATTAGAATTGTTTTAATGTAAACATTGTCTAGAGAAACAACTAATGTGCATATTTCATAACCGGGAAATGCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTCGCCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAGGCTCCGCCCCCTGGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCTCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT".getBytes(), b37_seqDict); + AlignmentInterval region1 = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm029081:tig00000\t0\t21\t26847644\t60\t1394M1675S\t*\t0\t0\tTATGGTGACAACAGTTACAGCCTCAGAGTGGCTTTGAGATGGAGATTTGAGATGGAGATTTTGGACACTCATAATCTCCATCTCAAGGAGAGGTGGCCCAGAGGCTGAATGAAGATGAGTGAAGGTAGATGTGATTCCCTTAAATTGGGGCAAAAAGGGACAAAAACAGCAGAAAATCTGTATCTTTAAAGACATATGTAATGTATTTCAGTCTATCAACTTCTCTACATAAACTTTAGCTTTTAAAAATATGTTAACGTAAGTTTGACCTTTAGTGTGTTTCTACCTGCAGGGTATTCTTATTGGAGGTTTGTTTAAAAGCATACATTTCTGATCTTGAATGGGTTACTACAAATCCATTATAATTGTTTCATATTTCATGTTGCAGATACAAGTAGGGTTGAAAAAACAGTGAGTTAAAGGCAAAAGGATGGCCGGGAACATGGCTTTTTTATTCTCTGGGTTTCTATCCAGATTTCTGTTCTTTTGCATAATGACTCCAATCTGTTGTGCACCTGTAGTTCTGGGAAATGATTCTTTTTTAATCGCTTCAACAGAGACATGGATGTTGGAGTTGCCAACTACTAAGCTGAAAAACTCCATCTATGCTCAGAAGAACATTTAATCCACTTACTTTTTCTCTTTTATTTAAAGATTAGCACTCATCAGGCATTTGTGGTAATATGCAAATATATACATAGGACATATATGTATATTTATAAGCAAAATGTGAATTGGAAAAACATTTGAATGTAGAAACAAGACCACAGGAGTAAATTTGTACAAGGCACTAGTAAAAGTGACATGTAATATGGGGTTCTTGTAGTGAGTTTCATAATCCAATTTTTGCTCCTTGATTTGAATGGGCACCCAAAATAACACATGCTATCCTAATCCCTACTCCCCATATTTTGGGTTTTATTTTTATAGAATACATATGGGCTTATATAAACATTAATCTCAACATGTTCTAATTTACATATGTAAGCTAATTTTTATTTCTAGAGATAACAGAACAAAACTCAAAACATTTGACATAAAATTATTGGAACAATTAACAGTTTGACCTATTAAACACATTATTGTCCTCTATGAACAGAGGGACTGTCTGAAAAAAAGAACAAGTTGTCTGCATTTTAAAGTGAGAGATAAGCATCAAGGTGTCAATTTCTATTTACACCTTATGTGTTCTTATTTGTTTCACTGATTCATATGTTATAGACACAATATTCTATTCACAATTTTCACGACGTCTATACCAAAGTAAGTATTCAACAAGTAGCCATGAAATGAGGAAATCTGGTAATATACATGAGCTATTAGAATTGTTTTAATGTAAACATTGTCTAGAGAAACAACTAATGTGCATATTTCATAACCGGGAAATGCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTCGCCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAGGCTCCGCCCCCTGGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCTCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT\t*\tSA:Z:21,26849022,+,1704S657M2I706M,60,2;chr10,97348533,+,1388S317M1364S,0,0;\tMD:Z:1204A189\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:1389\tXS:i:0", true); + AlignmentInterval region2 = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm029081:tig00000\t2048\t21\t26849022\t60\t1704H657M2I706M\t*\t0\t0\tCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT\t*\tSA:Z:21,26847644,+,1394M1675S,60,1;chr10,97348533,+,1388S317M1364S,0,0;\tMD:Z:1363\tRG:Z:GATKSVContigAlignments\tNM:i:2\tAS:i:1345\tXS:i:0", true); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), "TATGGTGACAACAGTTACAGCCTCAGAGTGGCTTTGAGATGGAGATTTGAGATGGAGATTTTGGACACTCATAATCTCCATCTCAAGGAGAGGTGGCCCAGAGGCTGAATGAAGATGAGTGAAGGTAGATGTGATTCCCTTAAATTGGGGCAAAAAGGGACAAAAACAGCAGAAAATCTGTATCTTTAAAGACATATGTAATGTATTTCAGTCTATCAACTTCTCTACATAAACTTTAGCTTTTAAAAATATGTTAACGTAAGTTTGACCTTTAGTGTGTTTCTACCTGCAGGGTATTCTTATTGGAGGTTTGTTTAAAAGCATACATTTCTGATCTTGAATGGGTTACTACAAATCCATTATAATTGTTTCATATTTCATGTTGCAGATACAAGTAGGGTTGAAAAAACAGTGAGTTAAAGGCAAAAGGATGGCCGGGAACATGGCTTTTTTATTCTCTGGGTTTCTATCCAGATTTCTGTTCTTTTGCATAATGACTCCAATCTGTTGTGCACCTGTAGTTCTGGGAAATGATTCTTTTTTAATCGCTTCAACAGAGACATGGATGTTGGAGTTGCCAACTACTAAGCTGAAAAACTCCATCTATGCTCAGAAGAACATTTAATCCACTTACTTTTTCTCTTTTATTTAAAGATTAGCACTCATCAGGCATTTGTGGTAATATGCAAATATATACATAGGACATATATGTATATTTATAAGCAAAATGTGAATTGGAAAAACATTTGAATGTAGAAACAAGACCACAGGAGTAAATTTGTACAAGGCACTAGTAAAAGTGACATGTAATATGGGGTTCTTGTAGTGAGTTTCATAATCCAATTTTTGCTCCTTGATTTGAATGGGCACCCAAAATAACACATGCTATCCTAATCCCTACTCCCCATATTTTGGGTTTTATTTTTATAGAATACATATGGGCTTATATAAACATTAATCTCAACATGTTCTAATTTACATATGTAAGCTAATTTTTATTTCTAGAGATAACAGAACAAAACTCAAAACATTTGACATAAAATTATTGGAACAATTAACAGTTTGACCTATTAAACACATTATTGTCCTCTATGAACAGAGGGACTGTCTGAAAAAAAGAACAAGTTGTCTGCATTTTAAAGTGAGAGATAAGCATCAAGGTGTCAATTTCTATTTACACCTTATGTGTTCTTATTTGTTTCACTGATTCATATGTTATAGACACAATATTCTATTCACAATTTTCACGACGTCTATACCAAAGTAAGTATTCAACAAGTAGCCATGAAATGAGGAAATCTGGTAATATACATGAGCTATTAGAATTGTTTTAATGTAAACATTGTCTAGAGAAACAACTAATGTGCATATTTCATAACCGGGAAATGCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTCGCCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAGGCTCCGCCCCCTGGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCTCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT".getBytes(), SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); - region1 = SVTestUtils.fromSAMRecordString("asm000001:tig00001\t2064\t21\t26849022\t60\t1704H657M3I706M\t*\t0\t0\tCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT\t*\tSA:Z:21,26847644,-,1394M1676S,60,1;chr10,97348533,-,1388S317M1365S,0,0;\tMD:Z:1363\tRG:Z:GATKSVContigAlignments\tNM:i:3\tAS:i:1344\tXS:i:0", true); - region2 = SVTestUtils.fromSAMRecordString("asm000001:tig00001\t16\t21\t26847644\t60\t1394M1676S\t*\t0\t0\tTATGGTGACAACAGTTACAGCCTCAGAGTGGCTTTGAGATGGAGATTTGAGATGGAGATTTTGGACACTCATAATCTCCATCTCAAGGAGAGGTGGCCCAGAGGCTGAATGAAGATGAGTGAAGGTAGATGTGATTCCCTTAAATTGGGGCAAAAAGGGACAAAAACAGCAGAAAATCTGTATCTTTAAAGACATATGTAATGTATTTCAGTCTATCAACTTCTCTACATAAACTTTAGCTTTTAAAAATATGTTAACGTAAGTTTGACCTTTAGTGTGTTTCTACCTGCAGGGTATTCTTATTGGAGGTTTGTTTAAAAGCATACATTTCTGATCTTGAATGGGTTACTACAAATCCATTATAATTGTTTCATATTTCATGTTGCAGATACAAGTAGGGTTGAAAAAACAGTGAGTTAAAGGCAAAAGGATGGCCGGGAACATGGCTTTTTTATTCTCTGGGTTTCTATCCAGATTTCTGTTCTTTTGCATAATGACTCCAATCTGTTGTGCACCTGTAGTTCTGGGAAATGATTCTTTTTTAATCGCTTCAACAGAGACATGGATGTTGGAGTTGCCAACTACTAAGCTGAAAAACTCCATCTATGCTCAGAAGAACATTTAATCCACTTACTTTTTCTCTTTTATTTAAAGATTAGCACTCATCAGGCATTTGTGGTAATATGCAAATATATACATAGGACATATATGTATATTTATAAGCAAAATGTGAATTGGAAAAACATTTGAATGTAGAAACAAGACCACAGGAGTAAATTTGTACAAGGCACTAGTAAAAGTGACATGTAATATGGGGTTCTTGTAGTGAGTTTCATAATCCAATTTTTGCTCCTTGATTTGAATGGGCACCCAAAATAACACATGCTATCCTAATCCCTACTCCCCATATTTTGGGTTTTATTTTTATAGAATACATATGGGCTTATATAAACATTAATCTCAACATGTTCTAATTTACATATGTAAGCTAATTTTTATTTCTAGAGATAACAGAACAAAACTCAAAACATTTGACATAAAATTATTGGAACAATTAACAGTTTGACCTATTAAACACATTATTGTCCTCTATGAACAGAGGGACTGTCTGAAAAAAAGAACAAGTTGTCTGCATTTTAAAGTGAGAGATAAGCATCAAGGTGTCAATTTCTATTTACACCTTATGTGTTCTTATTTGTTTCACTGATTCATATGTTATAGACACAATATTCTATTCACAATTTTCACGACGTCTATACCAAAGTAAGTATTCAACAAGTAGCCATGAAATGAGGAAATCTGGTAATATACATGAGCTATTAGAATTGTTTTAATGTAAACATTGTCTAGAGAAACAACTAATGTGCATATTTCATAACCGGGAAATGCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTCGCCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAGGCTCCGCCCCCTGGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCTCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT\t*\tSA:Z:21,26849022,-,1704S657M3I706M,60,3;chr10,97348533,-,1388S317M1365S,0,0;\tMD:Z:1204A189\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:1384\tXS:i:0", true); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), "TATGGTGACAACAGTTACAGCCTCAGAGTGGCTTTGAGATGGAGATTTGAGATGGAGATTTTGGACACTCATAATCTCCATCTCAAGGAGAGGTGGCCCAGAGGCTGAATGAAGATGAGTGAAGGTAGATGTGATTCCCTTAAATTGGGGCAAAAAGGGACAAAAACAGCAGAAAATCTGTATCTTTAAAGACATATGTAATGTATTTCAGTCTATCAACTTCTCTACATAAACTTTAGCTTTTAAAAATATGTTAACGTAAGTTTGACCTTTAGTGTGTTTCTACCTGCAGGGTATTCTTATTGGAGGTTTGTTTAAAAGCATACATTTCTGATCTTGAATGGGTTACTACAAATCCATTATAATTGTTTCATATTTCATGTTGCAGATACAAGTAGGGTTGAAAAAACAGTGAGTTAAAGGCAAAAGGATGGCCGGGAACATGGCTTTTTTATTCTCTGGGTTTCTATCCAGATTTCTGTTCTTTTGCATAATGACTCCAATCTGTTGTGCACCTGTAGTTCTGGGAAATGATTCTTTTTTAATCGCTTCAACAGAGACATGGATGTTGGAGTTGCCAACTACTAAGCTGAAAAACTCCATCTATGCTCAGAAGAACATTTAATCCACTTACTTTTTCTCTTTTATTTAAAGATTAGCACTCATCAGGCATTTGTGGTAATATGCAAATATATACATAGGACATATATGTATATTTATAAGCAAAATGTGAATTGGAAAAACATTTGAATGTAGAAACAAGACCACAGGAGTAAATTTGTACAAGGCACTAGTAAAAGTGACATGTAATATGGGGTTCTTGTAGTGAGTTTCATAATCCAATTTTTGCTCCTTGATTTGAATGGGCACCCAAAATAACACATGCTATCCTAATCCCTACTCCCCATATTTTGGGTTTTATTTTTATAGAATACATATGGGCTTATATAAACATTAATCTCAACATGTTCTAATTTACATATGTAAGCTAATTTTTATTTCTAGAGATAACAGAACAAAACTCAAAACATTTGACATAAAATTATTGGAACAATTAACAGTTTGACCTATTAAACACATTATTGTCCTCTATGAACAGAGGGACTGTCTGAAAAAAAGAACAAGTTGTCTGCATTTTAAAGTGAGAGATAAGCATCAAGGTGTCAATTTCTATTTACACCTTATGTGTTCTTATTTGTTTCACTGATTCATATGTTATAGACACAATATTCTATTCACAATTTTCACGACGTCTATACCAAAGTAAGTATTCAACAAGTAGCCATGAAATGAGGAAATCTGGTAATATACATGAGCTATTAGAATTGTTTTAATGTAAACATTGTCTAGAGAAACAACTAATGTGCATATTTCATAACCGGGAAATGCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTCGCCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAGGCTCCGCCCCCTGGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCTCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT".getBytes(), b37_seqDict); + region1 = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000001:tig00001\t2064\t21\t26849022\t60\t1704H657M3I706M\t*\t0\t0\tCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT\t*\tSA:Z:21,26847644,-,1394M1676S,60,1;chr10,97348533,-,1388S317M1365S,0,0;\tMD:Z:1363\tRG:Z:GATKSVContigAlignments\tNM:i:3\tAS:i:1344\tXS:i:0", true); + region2 = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000001:tig00001\t16\t21\t26847644\t60\t1394M1676S\t*\t0\t0\tTATGGTGACAACAGTTACAGCCTCAGAGTGGCTTTGAGATGGAGATTTGAGATGGAGATTTTGGACACTCATAATCTCCATCTCAAGGAGAGGTGGCCCAGAGGCTGAATGAAGATGAGTGAAGGTAGATGTGATTCCCTTAAATTGGGGCAAAAAGGGACAAAAACAGCAGAAAATCTGTATCTTTAAAGACATATGTAATGTATTTCAGTCTATCAACTTCTCTACATAAACTTTAGCTTTTAAAAATATGTTAACGTAAGTTTGACCTTTAGTGTGTTTCTACCTGCAGGGTATTCTTATTGGAGGTTTGTTTAAAAGCATACATTTCTGATCTTGAATGGGTTACTACAAATCCATTATAATTGTTTCATATTTCATGTTGCAGATACAAGTAGGGTTGAAAAAACAGTGAGTTAAAGGCAAAAGGATGGCCGGGAACATGGCTTTTTTATTCTCTGGGTTTCTATCCAGATTTCTGTTCTTTTGCATAATGACTCCAATCTGTTGTGCACCTGTAGTTCTGGGAAATGATTCTTTTTTAATCGCTTCAACAGAGACATGGATGTTGGAGTTGCCAACTACTAAGCTGAAAAACTCCATCTATGCTCAGAAGAACATTTAATCCACTTACTTTTTCTCTTTTATTTAAAGATTAGCACTCATCAGGCATTTGTGGTAATATGCAAATATATACATAGGACATATATGTATATTTATAAGCAAAATGTGAATTGGAAAAACATTTGAATGTAGAAACAAGACCACAGGAGTAAATTTGTACAAGGCACTAGTAAAAGTGACATGTAATATGGGGTTCTTGTAGTGAGTTTCATAATCCAATTTTTGCTCCTTGATTTGAATGGGCACCCAAAATAACACATGCTATCCTAATCCCTACTCCCCATATTTTGGGTTTTATTTTTATAGAATACATATGGGCTTATATAAACATTAATCTCAACATGTTCTAATTTACATATGTAAGCTAATTTTTATTTCTAGAGATAACAGAACAAAACTCAAAACATTTGACATAAAATTATTGGAACAATTAACAGTTTGACCTATTAAACACATTATTGTCCTCTATGAACAGAGGGACTGTCTGAAAAAAAGAACAAGTTGTCTGCATTTTAAAGTGAGAGATAAGCATCAAGGTGTCAATTTCTATTTACACCTTATGTGTTCTTATTTGTTTCACTGATTCATATGTTATAGACACAATATTCTATTCACAATTTTCACGACGTCTATACCAAAGTAAGTATTCAACAAGTAGCCATGAAATGAGGAAATCTGGTAATATACATGAGCTATTAGAATTGTTTTAATGTAAACATTGTCTAGAGAAACAACTAATGTGCATATTTCATAACCGGGAAATGCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTCGCCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAGGCTCCGCCCCCTGGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCTCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT\t*\tSA:Z:21,26849022,-,1704S657M3I706M,60,3;chr10,97348533,-,1388S317M1365S,0,0;\tMD:Z:1204A189\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:1384\tXS:i:0", true); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), "TATGGTGACAACAGTTACAGCCTCAGAGTGGCTTTGAGATGGAGATTTGAGATGGAGATTTTGGACACTCATAATCTCCATCTCAAGGAGAGGTGGCCCAGAGGCTGAATGAAGATGAGTGAAGGTAGATGTGATTCCCTTAAATTGGGGCAAAAAGGGACAAAAACAGCAGAAAATCTGTATCTTTAAAGACATATGTAATGTATTTCAGTCTATCAACTTCTCTACATAAACTTTAGCTTTTAAAAATATGTTAACGTAAGTTTGACCTTTAGTGTGTTTCTACCTGCAGGGTATTCTTATTGGAGGTTTGTTTAAAAGCATACATTTCTGATCTTGAATGGGTTACTACAAATCCATTATAATTGTTTCATATTTCATGTTGCAGATACAAGTAGGGTTGAAAAAACAGTGAGTTAAAGGCAAAAGGATGGCCGGGAACATGGCTTTTTTATTCTCTGGGTTTCTATCCAGATTTCTGTTCTTTTGCATAATGACTCCAATCTGTTGTGCACCTGTAGTTCTGGGAAATGATTCTTTTTTAATCGCTTCAACAGAGACATGGATGTTGGAGTTGCCAACTACTAAGCTGAAAAACTCCATCTATGCTCAGAAGAACATTTAATCCACTTACTTTTTCTCTTTTATTTAAAGATTAGCACTCATCAGGCATTTGTGGTAATATGCAAATATATACATAGGACATATATGTATATTTATAAGCAAAATGTGAATTGGAAAAACATTTGAATGTAGAAACAAGACCACAGGAGTAAATTTGTACAAGGCACTAGTAAAAGTGACATGTAATATGGGGTTCTTGTAGTGAGTTTCATAATCCAATTTTTGCTCCTTGATTTGAATGGGCACCCAAAATAACACATGCTATCCTAATCCCTACTCCCCATATTTTGGGTTTTATTTTTATAGAATACATATGGGCTTATATAAACATTAATCTCAACATGTTCTAATTTACATATGTAAGCTAATTTTTATTTCTAGAGATAACAGAACAAAACTCAAAACATTTGACATAAAATTATTGGAACAATTAACAGTTTGACCTATTAAACACATTATTGTCCTCTATGAACAGAGGGACTGTCTGAAAAAAAGAACAAGTTGTCTGCATTTTAAAGTGAGAGATAAGCATCAAGGTGTCAATTTCTATTTACACCTTATGTGTTCTTATTTGTTTCACTGATTCATATGTTATAGACACAATATTCTATTCACAATTTTCACGACGTCTATACCAAAGTAAGTATTCAACAAGTAGCCATGAAATGAGGAAATCTGGTAATATACATGAGCTATTAGAATTGTTTTAATGTAAACATTGTCTAGAGAAACAACTAATGTGCATATTTCATAACCGGGAAATGCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTCGCCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAGGCTCCGCCCCCTGGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCTCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT".getBytes(), SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); } @@ -715,7 +700,7 @@ public static List> getAllTes AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 25297101, 25297252), 1 ,152, TextCigarCodec.decode("152M147S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 25297164, 25297300), 163 ,299, TextCigarCodec.decode("162S137M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); // simple tandem duplication expansion with novel insertion '-' strand representation @@ -729,7 +714,7 @@ public static List> getAllTes region1 = new AlignmentInterval(new SimpleInterval("21", 25297164, 25297300), 1 ,137, TextCigarCodec.decode("137M162S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); region2 = new AlignmentInterval(new SimpleInterval("21", 25297101, 25297252), 148 ,299, TextCigarCodec.decode("147S152M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), contigSeq, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); } @@ -767,8 +752,8 @@ public static List> getAllTes AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("20", 312610, 312757), 128 ,275, TextCigarCodec.decode("127S148M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype( - new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), - contigSeqForComplexExpansionWithPseudoHomology, b37_seqDict); + new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), + contigSeqForComplexExpansionWithPseudoHomology, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); final byte[] contigSeqForComplexExpansionWithPseudoHomology_reverseStrand = Arrays.copyOf(contigSeqForComplexExpansionWithPseudoHomology, contigSeqForComplexExpansionWithPseudoHomology.length); @@ -777,8 +762,8 @@ public static List> getAllTes region2 = new AlignmentInterval(new SimpleInterval("20", 312579, 312718), 136 ,275, TextCigarCodec.decode("135S140M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); breakpoints = new NovelAdjacencyAndAltHaplotype( new SimpleChimera(region1, region2, Collections.emptyList(), - "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), - contigSeqForComplexExpansionWithPseudoHomology_reverseStrand, b37_seqDict); + "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), + contigSeqForComplexExpansionWithPseudoHomology_reverseStrand, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); // second test: contraction from 2 units to 1 unit with pseudo-homology @@ -788,8 +773,8 @@ public static List> getAllTes breakpoints = new NovelAdjacencyAndAltHaplotype( new SimpleChimera(region1, region2, Collections.emptyList(), - "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), - contigSeqForComplexContractionWithPseudoHomology, b37_seqDict); + "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), + contigSeqForComplexContractionWithPseudoHomology, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); final byte[] contigSeqForComplexContractionWithPseudoHomology_reverseStrand = Arrays.copyOf(contigSeqForComplexContractionWithPseudoHomology, contigSeqForComplexContractionWithPseudoHomology.length); @@ -798,8 +783,8 @@ public static List> getAllTes region2 = new AlignmentInterval(new SimpleInterval("20", 312579, 312718), 40, 179, TextCigarCodec.decode("39S140M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); breakpoints = new NovelAdjacencyAndAltHaplotype( new SimpleChimera(region1, region2, Collections.emptyList(), - "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), - contigSeqForComplexContractionWithPseudoHomology_reverseStrand, b37_seqDict); + "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), + contigSeqForComplexContractionWithPseudoHomology_reverseStrand, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); // third test: contraction from 3 units to 2 units without pseudo-homology @@ -810,8 +795,8 @@ public static List> getAllTes breakpoints = new NovelAdjacencyAndAltHaplotype( new SimpleChimera(region1, region2, Collections.emptyList(), - "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), - contigSeqForComplexContractionNoPseudoHomology, b37_seqDict); + "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), + contigSeqForComplexContractionNoPseudoHomology, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); final byte[] contigSeqForComplexContractionNoPseudoHomology_reverseStrand = Arrays.copyOf(contigSeqForComplexContractionNoPseudoHomology, contigSeqForComplexContractionNoPseudoHomology.length); @@ -820,8 +805,8 @@ public static List> getAllTes region2 = new AlignmentInterval(new SimpleInterval("20", 312579, 312801), 40, 262, TextCigarCodec.decode("39S223M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); breakpoints = new NovelAdjacencyAndAltHaplotype( new SimpleChimera(region1, region2, Collections.emptyList(), - "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), - contigSeqForComplexContractionNoPseudoHomology_reverseStrand, b37_seqDict); + "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), + contigSeqForComplexContractionNoPseudoHomology_reverseStrand, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); // fourth test: expansion from 2 units to 3 units without pseudo-homology @@ -831,8 +816,8 @@ public static List> getAllTes breakpoints = new NovelAdjacencyAndAltHaplotype( new SimpleChimera(region1, region2, Collections.emptyList(), - "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), - contigSeqForComplexExpansionNoPseudoHomology, b37_seqDict); + "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), + contigSeqForComplexExpansionNoPseudoHomology, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); final byte[] contigSeqForComplexExpansionNoPseudoHomology_reverseStrand = Arrays.copyOf(contigSeqForComplexExpansionNoPseudoHomology, contigSeqForComplexExpansionNoPseudoHomology.length); @@ -841,8 +826,8 @@ public static List> getAllTes region2 = new AlignmentInterval(new SimpleInterval("20", 312579, 312801), 136, 358, TextCigarCodec.decode("135S223M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); breakpoints = new NovelAdjacencyAndAltHaplotype( new SimpleChimera(region1, region2, Collections.emptyList(), - "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), - contigSeqForComplexExpansionNoPseudoHomology_reverseStrand, b37_seqDict); + "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), + contigSeqForComplexExpansionNoPseudoHomology_reverseStrand, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); return result; @@ -870,8 +855,8 @@ public static List> getAllTes AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("20", 312610, 312703), 74 ,167, TextCigarCodec.decode("73S94M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype( - new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), - contigSeqForComplexExpansionWithPseudoHomology, b37_seqDict); + new SimpleChimera(region1, region2, Collections.emptyList(), "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), + contigSeqForComplexExpansionWithPseudoHomology, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); final byte[] contigSeqForComplexExpansionWithPseudoHomology_reverseStrand = Arrays.copyOf(contigSeqForComplexExpansionWithPseudoHomology, contigSeqForComplexExpansionWithPseudoHomology.length); @@ -880,8 +865,8 @@ public static List> getAllTes region2 = new AlignmentInterval(new SimpleInterval("20", 312579, 312664), 82 ,167, TextCigarCodec.decode("81S86M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); breakpoints = new NovelAdjacencyAndAltHaplotype( new SimpleChimera(region1, region2, Collections.emptyList(), - "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), - contigSeqForComplexExpansionWithPseudoHomology_reverseStrand, b37_seqDict); + "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), + contigSeqForComplexExpansionWithPseudoHomology_reverseStrand, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); // second test: expansion from 2 units to 3 units without pseudo-homology @@ -891,8 +876,8 @@ public static List> getAllTes breakpoints = new NovelAdjacencyAndAltHaplotype( new SimpleChimera(region1, region2, Collections.emptyList(), - "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), - contigSeqForComplexExpansionNoPseudoHomology, b37_seqDict); + "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), + contigSeqForComplexExpansionNoPseudoHomology, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); final byte[] contigSeqForComplexExpansionNoPseudoHomology_reverseStrand = Arrays.copyOf(contigSeqForComplexExpansionNoPseudoHomology, contigSeqForComplexExpansionNoPseudoHomology.length); @@ -901,8 +886,8 @@ public static List> getAllTes region2 = new AlignmentInterval(new SimpleInterval("20", 312579, 312693), 82, 196, TextCigarCodec.decode("81S115M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); breakpoints = new NovelAdjacencyAndAltHaplotype( new SimpleChimera(region1, region2, Collections.emptyList(), - "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict), - contigSeqForComplexExpansionNoPseudoHomology_reverseStrand, b37_seqDict); + "asm000001:tig00001", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21), + contigSeqForComplexExpansionNoPseudoHomology_reverseStrand, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); return result; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVTypeUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVTypeUnitTest.java index 01267d5a16d..3e4a9e2780c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVTypeUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVTypeUnitTest.java @@ -39,7 +39,7 @@ public static void testAltAlleleSvLenAndIdProductions_stable(final NovelAdjacenc final int expectedSvLen, final String expectedTypeInfoInIdString) throws IOException { - final List producedAlleles = AnnotatedVariantProducer.produceAlleles(novelAdjacencyReferenceLocations.getLeftJustifiedLeftRefLoc(), b37_reference, simpleType); + final List producedAlleles = AnnotatedVariantProducer.produceAlleles(novelAdjacencyReferenceLocations.getLeftJustifiedLeftRefLoc(), SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, simpleType); Assert.assertEquals(producedAlleles.size(), 2); Assert.assertTrue(producedAlleles.get(0).isReference() && producedAlleles.get(1).isNonReference() && producedAlleles.get(1).isSymbolic()); @@ -136,7 +136,7 @@ public static void testAltAlleleSvLenAndIdProductions_new(final NovelAdjacencyAn final String expectedSymbolicAltAlleleStringWithoutBracket, final int expectedSvLen, final String expectedTypeInfoInIdString) throws IOException { - final List producedAlleles = AnnotatedVariantProducer.produceAlleles(novelAdjacencyReferenceLocations.getLeftJustifiedLeftRefLoc(), b37_reference, simpleType); + final List producedAlleles = AnnotatedVariantProducer.produceAlleles(novelAdjacencyReferenceLocations.getLeftJustifiedLeftRefLoc(), SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, simpleType); Assert.assertEquals(producedAlleles.size(), 2); Assert.assertTrue(producedAlleles.get(0).isReference() && producedAlleles.get(1).isNonReference() && producedAlleles.get(1).isSymbolic()); @@ -164,84 +164,84 @@ private Object[][] forAltAlleleSvLenAndIdProductions_new() { // no inversion case because new code path doesn't call inversion (instead, BND) // simple deletion - data.add(new Object[]{forSimpleDeletion_plus.biPathBubble, forSimpleDeletion_plus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forSimpleDeletion_plus.biPathBubble, forSimpleDeletion_plus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_DEL, -20, SimpleSVType.TYPES.DEL.name()}); // simple insertion - data.add(new Object[]{forSimpleInsertion_minus.biPathBubble, forSimpleInsertion_minus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forSimpleInsertion_minus.biPathBubble, forSimpleInsertion_minus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_INS, 50, SimpleSVType.TYPES.INS.name()}); // long range substitution fudged del - data.add(new Object[]{forLongRangeSubstitution_fudgedDel_plus.biPathBubble, forLongRangeSubstitution_fudgedDel_plus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forLongRangeSubstitution_fudgedDel_plus.biPathBubble, forLongRangeSubstitution_fudgedDel_plus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_DEL, -60, SimpleSVType.TYPES.DEL.name()}); // long range substitution fat ins - data.add(new Object[]{forLongRangeSubstitution_fatIns_minus.biPathBubble, forLongRangeSubstitution_fatIns_plus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forLongRangeSubstitution_fatIns_minus.biPathBubble, forLongRangeSubstitution_fatIns_plus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_INS, 60, SimpleSVType.TYPES.INS.name()}); // long range substitution fat ins - List svTypes = forLongRangeSubstitution_DelAndIns_plus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict); + List svTypes = forLongRangeSubstitution_DelAndIns_plus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); data.add(new Object[]{forLongRangeSubstitution_DelAndIns_plus.biPathBubble, svTypes.get(0), SYMB_ALT_ALLELE_DEL, -60, SimpleSVType.TYPES.DEL.name()}); data.add(new Object[]{forLongRangeSubstitution_DelAndIns_plus.biPathBubble, svTypes.get(1), SYMB_ALT_ALLELE_INS, 55, SimpleSVType.TYPES.INS.name()}); // simple deletion with homology - data.add(new Object[]{forDeletionWithHomology_minus.biPathBubble, forDeletionWithHomology_minus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forDeletionWithHomology_minus.biPathBubble, forDeletionWithHomology_minus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_DEL, -38, SimpleSVType.TYPES.DEL.name()}); // simple tandem dup contraction from 2 units to 1 unit - data.add(new Object[]{forSimpleTanDupContraction_plus.biPathBubble, forSimpleTanDupContraction_plus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forSimpleTanDupContraction_plus.biPathBubble, forSimpleTanDupContraction_plus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_DEL, -10, DUP_TAN_CONTRACTION_INTERNAL_ID_START_STRING}); // simple tandem dup expansion from 1 unit to 2 units that will be called as insertion - data.add(new Object[]{forSimpleTanDupExpansion_ins_minus.biPathBubble, forSimpleTanDupExpansion_ins_minus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forSimpleTanDupExpansion_ins_minus.biPathBubble, forSimpleTanDupExpansion_ins_minus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_INS, 10, SimpleSVType.TYPES.INS.name()}); // simple tandem dup expansion from 1 unit to 2 units that will be called as duplication - data.add(new Object[]{forSimpleTanDupExpansion_dup_minus.biPathBubble, forSimpleTanDupExpansion_dup_minus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forSimpleTanDupExpansion_dup_minus.biPathBubble, forSimpleTanDupExpansion_dup_minus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_DUP, 55, DUP_TAN_EXPANSION_INTERNAL_ID_START_STRING}); // simple tandem dup expansion from 1 unit to 2 units and novel insertion that will be called as insertion - data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_ins_plus.biPathBubble, forSimpleTanDupExpansionWithNovelIns_ins_plus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_ins_plus.biPathBubble, forSimpleTanDupExpansionWithNovelIns_ins_plus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_INS, 326, SimpleSVType.TYPES.INS.name()}); // simple tandem dup expansion from 1 unit to 2 units and novel insertion that will be called as duplication - data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_dup_plus.biPathBubble, forSimpleTanDupExpansionWithNovelIns_dup_plus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_dup_plus.biPathBubble, forSimpleTanDupExpansionWithNovelIns_dup_plus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_DUP, 99, DUP_TAN_EXPANSION_INTERNAL_ID_START_STRING}); // tandem dup expansion from 1 unit to 2 units with pseudo-homology - data.add(new Object[]{forComplexTanDup_1to2_pseudoHom_minus.biPathBubble, forComplexTanDup_1to2_pseudoHom_minus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forComplexTanDup_1to2_pseudoHom_minus.biPathBubble, forComplexTanDup_1to2_pseudoHom_minus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_DUP, 96, DUP_TAN_EXPANSION_INTERNAL_ID_START_STRING}); // tandem dup contraction from 2 units to 1 unit with pseudo-homology - data.add(new Object[]{forComplexTanDup_2to1_pseudoHom_plus.biPathBubble, forComplexTanDup_2to1_pseudoHom_plus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forComplexTanDup_2to1_pseudoHom_plus.biPathBubble, forComplexTanDup_2to1_pseudoHom_plus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_DEL, -96, DUP_TAN_CONTRACTION_INTERNAL_ID_START_STRING}); // tandem dup contraction from 3 units to 2 units - data.add(new Object[]{forComplexTanDup_3to2_noPseudoHom_minus.biPathBubble, forComplexTanDup_3to2_noPseudoHom_minus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forComplexTanDup_3to2_noPseudoHom_minus.biPathBubble, forComplexTanDup_3to2_noPseudoHom_minus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_DEL, -96, DUP_TAN_CONTRACTION_INTERNAL_ID_START_STRING}); // tandem dup expansion from 2 units to 3 units - data.add(new Object[]{forComplexTanDup_2to3_noPseudoHom_plus.biPathBubble, forComplexTanDup_2to3_noPseudoHom_plus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forComplexTanDup_2to3_noPseudoHom_plus.biPathBubble, forComplexTanDup_2to3_noPseudoHom_plus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_DUP, 96, DUP_TAN_EXPANSION_INTERNAL_ID_START_STRING}); // short tandem dup expansion from 1 unit to 2 units with pseudo-homology - data.add(new Object[]{forComplexTanDup_1to2_short_pseudoHom_plus.biPathBubble, forComplexTanDup_1to2_short_pseudoHom_plus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forComplexTanDup_1to2_short_pseudoHom_plus.biPathBubble, forComplexTanDup_1to2_short_pseudoHom_plus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_INS, 42, SimpleSVType.TYPES.INS.name()}); // short tandem dup expansion from 2 units to 3 units - data.add(new Object[]{forComplexTanDup_2to3_short_noPseudoHom_minus.biPathBubble, forComplexTanDup_2to3_short_noPseudoHom_minus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + data.add(new Object[]{forComplexTanDup_2to3_short_noPseudoHom_minus.biPathBubble, forComplexTanDup_2to3_short_noPseudoHom_minus.biPathBubble.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21).get(0), SYMB_ALT_ALLELE_INS, 42, SimpleSVType.TYPES.INS.name()}); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignedAssemblyUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignedAssemblyUnitTest.java index a145dc0d7a1..beb6e3c7ab5 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignedAssemblyUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignedAssemblyUnitTest.java @@ -7,7 +7,7 @@ import htsjdk.samtools.Cigar; import htsjdk.samtools.TextCigarCodec; import org.broadinstitute.hellbender.GATKBaseTest; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVTestUtils; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider; import org.broadinstitute.hellbender.tools.spark.sv.evidence.AlignedAssemblyOrExcuse; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.read.CigarUtils; @@ -48,12 +48,12 @@ private Object[][] createInputsAndExpectedResults_Serialization() { final Cigar[] cigars = Arrays.stream(cigarStrings).map(TextCigarCodec::decode).toArray(Cigar[]::new); // these sequence are technically wrong the for the inversion event, but the test purpose is for serialization so it is irrelevant - final byte[] dummySequenceForContigOne = SVTestUtils.makeDummySequence(seqLen[0], (byte)'A'); + final byte[] dummySequenceForContigOne = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(seqLen[0], (byte)'A'); - final byte[] dummySequenceForContigTwo = SVTestUtils.makeDummySequence(seqLen[0], (byte)'T'); - final byte[] dummySequenceForContigThree = SVTestUtils.makeDummySequence(seqLen[0], (byte)'C'); + final byte[] dummySequenceForContigTwo = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(seqLen[0], (byte)'T'); + final byte[] dummySequenceForContigThree = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(seqLen[0], (byte)'C'); - final byte[] dummySequenceForContigFour = SVTestUtils.makeDummySequence(seqLen[0], (byte)'G'); + final byte[] dummySequenceForContigFour = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(seqLen[0], (byte)'G'); final List allContigs = new ArrayList<>(); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignedContigGeneratorUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignedContigGeneratorUnitTest.java index a399b39a69e..9c2a63c23fe 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignedContigGeneratorUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignedContigGeneratorUnitTest.java @@ -7,7 +7,7 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.engine.spark.SparkContextFactory; import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryPipelineSpark; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVTestUtils; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider; import org.broadinstitute.hellbender.tools.spark.sv.discovery.SimpleSVDiscoveryTestDataProvider; import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoverFromLocalAssemblyContigAlignmentsSpark; import org.broadinstitute.hellbender.tools.spark.sv.evidence.AlignedAssemblyOrExcuse; @@ -119,8 +119,8 @@ public void testConvertAlignedAssemblyOrExcuseToAlignedContigsDirectAndConcordan Assert.assertTrue(StructuralVariationDiscoveryPipelineSpark.InMemoryAlignmentParser.filterAndConvertToAlignedContigDirect(Collections.singletonList(excuse), refNames, null).isEmpty()); // produce test assembly and alignment - final byte[] dummyContigSequence = SVTestUtils.makeDummySequence(1000, (byte)'T'); - final byte[] dummyContigSequenceQuals = SVTestUtils.makeDummySequence(1000, (byte)'A'); + final byte[] dummyContigSequence = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(1000, (byte)'T'); + final byte[] dummyContigSequenceQuals = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(1000, (byte)'A'); final List dummyConnections = Collections.emptyList(); final FermiLiteAssembly.Contig unmappedContig = new FermiLiteAssembly.Contig(dummyContigSequence, dummyContigSequenceQuals, 100); // totally random 100 supporting reads @@ -199,7 +199,7 @@ public void testConvertUnmappedRecords() { private Object[][] forNullOrEmptyAlignments() { final List data = new ArrayList<>(20); - data.add(new Object[]{"dummy", SVTestUtils.makeDummySequence(100, (byte) 'A'), null}); + data.add(new Object[]{"dummy", SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte) 'A'), null}); return data.toArray(new Object[data.size()][]); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentIntervalUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentIntervalUnitTest.java index fb7d59aeb3c..16a36b32d66 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentIntervalUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentIntervalUnitTest.java @@ -2,7 +2,7 @@ import htsjdk.samtools.*; import org.broadinstitute.hellbender.GATKBaseTest; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVTestUtils; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider; import org.broadinstitute.hellbender.tools.spark.sv.utils.Strand; import org.broadinstitute.hellbender.utils.RandomDNA; import org.broadinstitute.hellbender.utils.SimpleInterval; @@ -206,7 +206,7 @@ public void testConstructionFromSAMRecord(final BwaMemAlignment bwaMemAlignment, final boolean expectedIsPositiveStrand, final int expectedStartOnContig_1BasedInclusive, final int expectedEndOnContig_1BasedInclusive, final int expectedContigLength, final int expectedMapQualInBwaMemAlignment, final AlignmentInterval expectedAlignmentInterval) { - final SAMRecord samRecord = BwaMemAlignmentUtils.applyAlignment("whatever", SVTestUtils.makeDummySequence(expectedContigLength, (byte)'A'), null, null, bwaMemAlignment, refNames, hg19Header, false, false); + final SAMRecord samRecord = BwaMemAlignmentUtils.applyAlignment("whatever", SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(expectedContigLength, (byte)'A'), null, null, bwaMemAlignment, refNames, hg19Header, false, false); final AlignmentInterval alignmentInterval = new AlignmentInterval(samRecord); Assert.assertEquals(alignmentInterval.referenceSpan, expectedReferenceInterval); Assert.assertEquals(alignmentInterval.cigarAlong5to3DirectionOfContig, expectedCigar); @@ -222,7 +222,7 @@ public void testConstructionFromStr(final BwaMemAlignment bwaMemAlignment, final final boolean expectedIsPositiveStrand, final int expectedStartOnContig_1BasedInclusive, final int expectedEndOnContig_1BasedInclusive, final int expectedContigLength, final int expectedMapQualInBwaMemAlignment, final AlignmentInterval expectedAlignmentInterval) { - final SAMRecord samRecord = BwaMemAlignmentUtils.applyAlignment("whatever", SVTestUtils.makeDummySequence(expectedContigLength, (byte)'A'), null, null, bwaMemAlignment, refNames, hg19Header, false, false); + final SAMRecord samRecord = BwaMemAlignmentUtils.applyAlignment("whatever", SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(expectedContigLength, (byte)'A'), null, null, bwaMemAlignment, refNames, hg19Header, false, false); final StringBuilder strBuilder = new StringBuilder(String.join(",", samRecord.getContig(), "" + samRecord.getStart(), samRecord.getReadNegativeStrandFlag() ? "-" : "+", samRecord.getCigarString(), "" + samRecord.getMappingQuality())); if (samRecord.getAttribute(SAMTag.NM.name()) != null || samRecord.getAttribute(SAMTag.AS.name()) != null) { @@ -246,7 +246,7 @@ public void testConstructionFromGATKRead(final BwaMemAlignment bwaMemAlignment, final boolean expectedIsPositiveStrand, final int expectedStartOnContig_1BasedInclusive, final int expectedEndOnContig_1BasedInclusive, final int expectedContigLength, final int expectedMapQualInBwaMemAlignment, final AlignmentInterval expectedAlignmentInterval) { - final SAMRecord samRecord = BwaMemAlignmentUtils.applyAlignment("whatever", SVTestUtils.makeDummySequence(expectedContigLength, (byte)'A'), null, null, bwaMemAlignment, refNames, hg19Header, false, false); + final SAMRecord samRecord = BwaMemAlignmentUtils.applyAlignment("whatever", SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(expectedContigLength, (byte)'A'), null, null, bwaMemAlignment, refNames, hg19Header, false, false); final GATKRead read = new SAMRecordToGATKReadAdapter(samRecord); final AlignmentInterval alignmentInterval = new AlignmentInterval(read); Assert.assertEquals(alignmentInterval.referenceSpan, expectedReferenceInterval); @@ -410,15 +410,15 @@ public void testComputeReadIntervalAlignedToRefSpan(final AlignmentInterval alig private Object[][] testDataForContainsGapOfEqualOrLargerSize(){ final List data = new ArrayList<>(20); - final AlignmentInterval one = SVTestUtils.fromSAMRecordString("asm000001:tig00002\t0\tchr1\t180969\t60\t400S237M58D320M4D76M\t*\t0\t0\tCCCTGCAATGTCCCTAGCTGCCAGCAGGCGGCGTGCCACCACTATACAGTAAGCAAGAGGGCCCTGCAGTGCCCCGGCGCCAGCAGGGGGCGCTGGCCACCACTCTAAGCAAGAGAGCCCTGCAGTTGCCCTAGTCGCCAGCAGGGGGCGCCCTGGCACAGCACCGTGAGCAAGCGGGTCCTGTAGTGCCCGGCTGCAAGCAAGGGGCTGTCGATCCCGGCGTTTCGGATTACTGAGGTTCCACCCGTCTCTGCGCCGCGCCGCCGTGACGTGAGTTTCTGCGCGTGCACGGCGCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGCGCCCTCCCCTCCCCTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCAACCCTAACCCCAACCCTAACCCTAACCCTCGCGGTACCCTCAGCCGGCCCGCCCGCCCGGGTCTGACCTGAGGAGAACTGTGCTCCGCCTTCAGAGTACCACCGAAATCTGTGCAGAGGACAACGCAGCTCCGCCCTCGCGGTGCTCTCCGGGTCTGTGCTGAGGAGAACGCAACTCCGCCGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGACACATGCTAGCGCGTCCAGGGGAGGAGGCGTGGCACAGGCGCAGAGACACATGCTAGCGCGCCCAGGGGAGGAGGCGTGGCGCAGGCGCAGAGAGGCGCGCCGTGCTGCCGCAGGCGCAGAGACACATGCTAGCGCGTCCAGGGGGTGGAGGCGTGGCGCAGGCGCAGAGACGCACGCCTACGGGCGGGGTTGGGGGGGGCGTGTGTTACAGGAGCAAAGTCGCACGGCGCCGGGCTGGGGGCGGGGGCGGGGGGGCGCCGTGCACGCGCAGAAACTCACGTCACGGCGGCGCGGCGCAGAGACGGGTGGAACCTCAGT\t*\tSA:Z:chr1,181578,-,716S317M,6,1;chr2,32916352,-,640S99M294S,0,5;\tMD:Z:11T132A92^GCCGGCGCAGGCGCAGAGAGGCGCGCCTCGCCGGCGCAGGCGCAGAGAGGCGCGCCGG320^GGGG5G70\tRG:Z:GATKSVContigAlignments\tNM:i:65\tAS:i:524\tXS:i:426", + final AlignmentInterval one = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000001:tig00002\t0\tchr1\t180969\t60\t400S237M58D320M4D76M\t*\t0\t0\tCCCTGCAATGTCCCTAGCTGCCAGCAGGCGGCGTGCCACCACTATACAGTAAGCAAGAGGGCCCTGCAGTGCCCCGGCGCCAGCAGGGGGCGCTGGCCACCACTCTAAGCAAGAGAGCCCTGCAGTTGCCCTAGTCGCCAGCAGGGGGCGCCCTGGCACAGCACCGTGAGCAAGCGGGTCCTGTAGTGCCCGGCTGCAAGCAAGGGGCTGTCGATCCCGGCGTTTCGGATTACTGAGGTTCCACCCGTCTCTGCGCCGCGCCGCCGTGACGTGAGTTTCTGCGCGTGCACGGCGCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGCGCCCTCCCCTCCCCTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCAACCCTAACCCCAACCCTAACCCTAACCCTCGCGGTACCCTCAGCCGGCCCGCCCGCCCGGGTCTGACCTGAGGAGAACTGTGCTCCGCCTTCAGAGTACCACCGAAATCTGTGCAGAGGACAACGCAGCTCCGCCCTCGCGGTGCTCTCCGGGTCTGTGCTGAGGAGAACGCAACTCCGCCGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGACACATGCTAGCGCGTCCAGGGGAGGAGGCGTGGCACAGGCGCAGAGACACATGCTAGCGCGCCCAGGGGAGGAGGCGTGGCGCAGGCGCAGAGAGGCGCGCCGTGCTGCCGCAGGCGCAGAGACACATGCTAGCGCGTCCAGGGGGTGGAGGCGTGGCGCAGGCGCAGAGACGCACGCCTACGGGCGGGGTTGGGGGGGGCGTGTGTTACAGGAGCAAAGTCGCACGGCGCCGGGCTGGGGGCGGGGGCGGGGGGGCGCCGTGCACGCGCAGAAACTCACGTCACGGCGGCGCGGCGCAGAGACGGGTGGAACCTCAGT\t*\tSA:Z:chr1,181578,-,716S317M,6,1;chr2,32916352,-,640S99M294S,0,5;\tMD:Z:11T132A92^GCCGGCGCAGGCGCAGAGAGGCGCGCCTCGCCGGCGCAGGCGCAGAGAGGCGCGCCGG320^GGGG5G70\tRG:Z:GATKSVContigAlignments\tNM:i:65\tAS:i:524\tXS:i:426", true); data.add(new Object[]{one, 50, true}); - final AlignmentInterval two = SVTestUtils.fromSAMRecordString("asm000001:tig00005\t16\tchr2\t113602055\t60\t129M29D136M\t*\t0\t0\tCAAGAGGACCCTGCAATGTCCCTAGCTGCCAGCAGGCGGCGTGCCACCACTATACAGTAAGCAAGAGGGCCCTGCAGTGCCCCGGCGCCAGCAGGGGGCGCTGGCGACCACTGTAAGCAAGAGAGCCCTGCGCCTCTCTGCGCCGGCGCGGCGCGGGGTGCCTTTGCGACGGCGGAGTTGCGTTCTCCTCAGCACAGACCCGGAGAGCACCGCGAGGGCGGACCTGCGTTGTCCTCTGCACAGATTTCAGTGGTACTGCGAAGGC\t*\tMD:Z:129^GCGCCTCTCTGCGCCGGCGCCGGCGCGGC136\tRG:Z:GATKSVContigAlignments\tNM:i:29\tAS:i:2", + final AlignmentInterval two = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000001:tig00005\t16\tchr2\t113602055\t60\t129M29D136M\t*\t0\t0\tCAAGAGGACCCTGCAATGTCCCTAGCTGCCAGCAGGCGGCGTGCCACCACTATACAGTAAGCAAGAGGGCCCTGCAGTGCCCCGGCGCCAGCAGGGGGCGCTGGCGACCACTGTAAGCAAGAGAGCCCTGCGCCTCTCTGCGCCGGCGCGGCGCGGGGTGCCTTTGCGACGGCGGAGTTGCGTTCTCCTCAGCACAGACCCGGAGAGCACCGCGAGGGCGGACCTGCGTTGTCCTCTGCACAGATTTCAGTGGTACTGCGAAGGC\t*\tMD:Z:129^GCGCCTCTCTGCGCCGGCGCCGGCGCGGC136\tRG:Z:GATKSVContigAlignments\tNM:i:29\tAS:i:2", false); data.add(new Object[]{two, 50, false}); - final AlignmentInterval three = SVTestUtils.fromSAMRecordString("asm000004:tig00026\t0\tchr1\t224015348\t40\t104S109M20I190M10D79M15D69M\t*\t0\t0\tAGGAGGGAATGTATTCGAGTGGAATGGAAAGGAATGGAATCAACCATAGTGGAATGGAAGGGAATGGAATGGAATGGTAACTAATAGAATGGAATCAACCCGAGTGGAATGGAATGGAAAGGACTGGAATGGAATGGAATGGAATGGAATGGACTCAGATGGAATGGAATGGAATGGACTCGAAAGGATTGGGATGGAATACAATGGAATGGTCTCGAATGGAATGGAATGGACTCGAATGGAATGGAATGCAATGGAATGGACTCAAATGGAATGGAATGGAACTGACTCGAATGGAATTGAATGGAATGGACCCGAATGGAATGGAATGGAATGGACTGGGCTCAAATTGAATGGAATGGAAAAGAATGGAATGGAATAGAATGGACTGGAATGTAATGAGTTTGGAATGGACTTGAATGCAATGGAATGGAATGGACTCAAATGGAATAGCATGGAATGGAATGGACTCAAATGCATTGGAATGGAATGGACTTGAATGGAATGGAATGGAGTTGAATGGACTCATATGGAATGGAATGGCATTGAATGGACTCGAATGGAATAGAAT\t*\tSA:Z:chr5,49661411,-,418S150M3S,4,9;chrUn_KN707896v1_decoy,5378,-,418S153M,0,3;\tMD:Z:7C7T5C277^AATGGAATGG79^GAATGGAATGGACTC69\tRG:Z:GATKSVContigAlignments\tNM:i:48\tAS:i:339\tXS:i:308", + final AlignmentInterval three = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000004:tig00026\t0\tchr1\t224015348\t40\t104S109M20I190M10D79M15D69M\t*\t0\t0\tAGGAGGGAATGTATTCGAGTGGAATGGAAAGGAATGGAATCAACCATAGTGGAATGGAAGGGAATGGAATGGAATGGTAACTAATAGAATGGAATCAACCCGAGTGGAATGGAATGGAAAGGACTGGAATGGAATGGAATGGAATGGAATGGACTCAGATGGAATGGAATGGAATGGACTCGAAAGGATTGGGATGGAATACAATGGAATGGTCTCGAATGGAATGGAATGGACTCGAATGGAATGGAATGCAATGGAATGGACTCAAATGGAATGGAATGGAACTGACTCGAATGGAATTGAATGGAATGGACCCGAATGGAATGGAATGGAATGGACTGGGCTCAAATTGAATGGAATGGAAAAGAATGGAATGGAATAGAATGGACTGGAATGTAATGAGTTTGGAATGGACTTGAATGCAATGGAATGGAATGGACTCAAATGGAATAGCATGGAATGGAATGGACTCAAATGCATTGGAATGGAATGGACTTGAATGGAATGGAATGGAGTTGAATGGACTCATATGGAATGGAATGGCATTGAATGGACTCGAATGGAATAGAAT\t*\tSA:Z:chr5,49661411,-,418S150M3S,4,9;chrUn_KN707896v1_decoy,5378,-,418S153M,0,3;\tMD:Z:7C7T5C277^AATGGAATGG79^GAATGGAATGGACTC69\tRG:Z:GATKSVContigAlignments\tNM:i:48\tAS:i:339\tXS:i:308", true); data.add(new Object[]{three, 10, true}); data.add(new Object[]{three, 20, true}); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPickerUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPickerUnitTest.java index 764197c3174..f5fce113c2a 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPickerUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPickerUnitTest.java @@ -5,7 +5,7 @@ import org.apache.commons.collections4.IteratorUtils; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.engine.spark.SparkContextFactory; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVTestUtils; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider; import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.CpxSVInferenceTestUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.read.CigarUtils; @@ -18,9 +18,9 @@ import java.util.stream.Collectors; import static org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.GAPPED_ALIGNMENT_BREAK_DEFAULT_SENSITIVITY; -import static org.broadinstitute.hellbender.tools.spark.sv.discovery.SVTestUtils.fromPrimarySAMRecordString; -import static org.broadinstitute.hellbender.tools.spark.sv.discovery.SVTestUtils.makeDummySequence; -import static org.broadinstitute.hellbender.tools.spark.sv.discovery.SimpleSVDiscoveryTestDataProvider.b38_canonicalChromosomes; +import static org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider.fromPrimarySAMRecordString; +import static org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence; +import static org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider.b38_canonicalChromosomes; import static org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigAlignmentsConfigPicker.GoodAndBadMappings; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; @@ -151,11 +151,11 @@ private Object[][] forFilterSecondaryConfigurationsByMappingQualityThreshold() { data.add(new Object[]{Arrays.asList(rep1, rep2), 10, Collections.singletonList(rep1)}); - final AlignedContig alignedContig = SVTestUtils.fromPrimarySAMRecordString("asm031090:tig00000\t16\tchr5\t49659827\t60\t332S112M161S\t*\t0\t0\tCATTCCGTTCCGTTCCATTCCATTCCATTCCATTCTATTCGGGTTAATTCCATTCCATTCCATTCGATTGCAATCGAGTTGATTCCATTCCCTAACATTCCATTCCATTCCATTCCATTCCATTCCATTCCATTCCTTTCCATTCCATTACGGATGATTCCATTCCATTGCATTCCATTCCATTCCATTCCCCTGTACTCGGGTTGATTCCATTCCATTGCATTCCAATCCATGCCCTTCCACTCGTGTTGATTCCATTCTTTCCATTCCATTCAAGTTGAATCCATTCCATTGCAATCCATTCCATTCGATTCCATTCGATTGCACTCGGGTTGATTCCATTCCATTGCATTCCATTCCATTCCATTCCATTCCATTCCGTTCCATTCCTTTCCATTACATTCGGATTGATTCTATTCAATTCCCTTACACTCCATTACATTCCATTTCATTCCGGTAGTTTTCACTCCATTCCATTCCATTTCTCTCCATTCCATTGCACTCGGGTTGATTCCATTCCATTGCATTCCATTCCATTTGGGTAGTTTCCACTCCATTCCATTCCATTTCTCTCCATTCCATTGCACTCGGGTTGATTCCATTCC\t*\tSA:Z:chr22_KI270736v1_random,101512,+,455S56M94S,0,1;chr10,41903518,+,372S74M159S,48,7;chr20,31162579,+,37S59M509S,0,5;chr20,31188805,+,298S43M264S,0,2;chr4,49639434,+,331S37M237S,60,1;chrUn_KI270519v1,137524,+,101S37M467S,3,1;chrUn_KN707896v1_decoy,6014,-,81M15I253M5D189M67S,0,34;chrUn_KN707896v1_decoy,6436,-,517S88M,60,3;\tMD:Z:58A7C7G18T12C5\tRG:Z:GATKSVContigAlignments\tNM:i:5\tAS:i:87\tXS:i:55", + final AlignedContig alignedContig = SVDiscoveryTestUtilsAndCommonDataProvider.fromPrimarySAMRecordString("asm031090:tig00000\t16\tchr5\t49659827\t60\t332S112M161S\t*\t0\t0\tCATTCCGTTCCGTTCCATTCCATTCCATTCCATTCTATTCGGGTTAATTCCATTCCATTCCATTCGATTGCAATCGAGTTGATTCCATTCCCTAACATTCCATTCCATTCCATTCCATTCCATTCCATTCCATTCCTTTCCATTCCATTACGGATGATTCCATTCCATTGCATTCCATTCCATTCCATTCCCCTGTACTCGGGTTGATTCCATTCCATTGCATTCCAATCCATGCCCTTCCACTCGTGTTGATTCCATTCTTTCCATTCCATTCAAGTTGAATCCATTCCATTGCAATCCATTCCATTCGATTCCATTCGATTGCACTCGGGTTGATTCCATTCCATTGCATTCCATTCCATTCCATTCCATTCCATTCCGTTCCATTCCTTTCCATTACATTCGGATTGATTCTATTCAATTCCCTTACACTCCATTACATTCCATTTCATTCCGGTAGTTTTCACTCCATTCCATTCCATTTCTCTCCATTCCATTGCACTCGGGTTGATTCCATTCCATTGCATTCCATTCCATTTGGGTAGTTTCCACTCCATTCCATTCCATTTCTCTCCATTCCATTGCACTCGGGTTGATTCCATTCC\t*\tSA:Z:chr22_KI270736v1_random,101512,+,455S56M94S,0,1;chr10,41903518,+,372S74M159S,48,7;chr20,31162579,+,37S59M509S,0,5;chr20,31188805,+,298S43M264S,0,2;chr4,49639434,+,331S37M237S,60,1;chrUn_KI270519v1,137524,+,101S37M467S,3,1;chrUn_KN707896v1_decoy,6014,-,81M15I253M5D189M67S,0,34;chrUn_KN707896v1_decoy,6436,-,517S88M,60,3;\tMD:Z:58A7C7G18T12C5\tRG:Z:GATKSVContigAlignments\tNM:i:5\tAS:i:87\tXS:i:55", true); final List goodAndBadMappings = AssemblyContigAlignmentsConfigPicker.pickBestConfigurations(alignedContig, new HashSet<>(Arrays.asList("chr4", "chr5", "chr10", "chr20", "")), 0.0); - final List goodAfterTieBreak = SVTestUtils.fromPrimarySAMRecordString("asm031090:tig00000\t16\tchr5\t49659827\t60\t332S112M161S\t*\t0\t0\tCATTCCGTTCCGTTCCATTCCATTCCATTCCATTCTATTCGGGTTAATTCCATTCCATTCCATTCGATTGCAATCGAGTTGATTCCATTCCCTAACATTCCATTCCATTCCATTCCATTCCATTCCATTCCATTCCTTTCCATTCCATTACGGATGATTCCATTCCATTGCATTCCATTCCATTCCATTCCCCTGTACTCGGGTTGATTCCATTCCATTGCATTCCAATCCATGCCCTTCCACTCGTGTTGATTCCATTCTTTCCATTCCATTCAAGTTGAATCCATTCCATTGCAATCCATTCCATTCGATTCCATTCGATTGCACTCGGGTTGATTCCATTCCATTGCATTCCATTCCATTCCATTCCATTCCATTCCGTTCCATTCCTTTCCATTACATTCGGATTGATTCTATTCAATTCCCTTACACTCCATTACATTCCATTTCATTCCGGTAGTTTTCACTCCATTCCATTCCATTTCTCTCCATTCCATTGCACTCGGGTTGATTCCATTCCATTGCATTCCATTCCATTTGGGTAGTTTCCACTCCATTCCATTCCATTTCTCTCCATTCCATTGCACTCGGGTTGATTCCATTCC\t*\tSA:Z:chr10,41903518,+,372S74M159S,48,7;chr4,49639434,+,331S37M237S,60,1;chrUn_KI270519v1,137524,+,101S37M467S,3,1;chrUn_KN707896v1_decoy,6436,-,517S88M,60,3;\tMD:Z:58A7C7G18T12C5\tRG:Z:GATKSVContigAlignments\tNM:i:5\tAS:i:87\tXS:i:55", + final List goodAfterTieBreak = SVDiscoveryTestUtilsAndCommonDataProvider.fromPrimarySAMRecordString("asm031090:tig00000\t16\tchr5\t49659827\t60\t332S112M161S\t*\t0\t0\tCATTCCGTTCCGTTCCATTCCATTCCATTCCATTCTATTCGGGTTAATTCCATTCCATTCCATTCGATTGCAATCGAGTTGATTCCATTCCCTAACATTCCATTCCATTCCATTCCATTCCATTCCATTCCATTCCTTTCCATTCCATTACGGATGATTCCATTCCATTGCATTCCATTCCATTCCATTCCCCTGTACTCGGGTTGATTCCATTCCATTGCATTCCAATCCATGCCCTTCCACTCGTGTTGATTCCATTCTTTCCATTCCATTCAAGTTGAATCCATTCCATTGCAATCCATTCCATTCGATTCCATTCGATTGCACTCGGGTTGATTCCATTCCATTGCATTCCATTCCATTCCATTCCATTCCATTCCGTTCCATTCCTTTCCATTACATTCGGATTGATTCTATTCAATTCCCTTACACTCCATTACATTCCATTTCATTCCGGTAGTTTTCACTCCATTCCATTCCATTTCTCTCCATTCCATTGCACTCGGGTTGATTCCATTCCATTGCATTCCATTCCATTTGGGTAGTTTCCACTCCATTCCATTCCATTTCTCTCCATTCCATTGCACTCGGGTTGATTCCATTCC\t*\tSA:Z:chr10,41903518,+,372S74M159S,48,7;chr4,49639434,+,331S37M237S,60,1;chrUn_KI270519v1,137524,+,101S37M467S,3,1;chrUn_KN707896v1_decoy,6436,-,517S88M,60,3;\tMD:Z:58A7C7G18T12C5\tRG:Z:GATKSVContigAlignments\tNM:i:5\tAS:i:87\tXS:i:55", true).getAlignments(); final ArrayList copy = new ArrayList<>(alignedContig.getAlignments()); copy.removeAll(goodAfterTieBreak); @@ -227,25 +227,25 @@ private Object[][] forTestSpecialChanelForSingleNonCanonicalMappings() { // note chromosome names is hacked to use test seq dict - final AlignmentInterval altOne = SVTestUtils.fromSAMRecordString("asm000099:tig00029\t2064\tchr1_KI270762v1_alt\t207249\t2\t38H252M229H\t*\t0\t0\tGAGCATCTGACAGCCTGGAACAGCACCCATACGCCCAGATGAGCATCTGACAGCCTGAAACAGCACCCTGCACCCCCAGGTGTGCAACTGACAGCCTGGAACAGCACACACTCACCCAGGCCAGCATCTGATGGCCTGGAACGGCACCCACACCCCCAGGTAAGCATCCGACATCCTGAAACAGCTCCCACACCCCCAGGCGAGCATCTGACAGCCTGGAGCAGTGCCCACACCCCCAGGTGAGCATCTGAC\t*\tSA:Z:chr1,2655749,-,125S394M,1,42;chr1,2667827,-,66M453S,0,3;chr1,2761512,-,95S50M374S,0,3;chrUn_JTFH01001111v1_decoy,1,+,22S273M224S,46,12;chr1_KI270762v1_alt,207251,-,66M453S,0,2;\tMD:Z:57G11A2T13C20C12G0G19T19G30A7T7C15C0A4T21\tRG:Z:GATKSVContigAlignments\tNM:i:15\tAS:i:177\tXS:i:172", true); - final AlignmentInterval altTwo = SVTestUtils.fromSAMRecordString("asm000099:tig00029\t2064\tchr1_KI270762v1_alt\t207251\t0\t66M453H\t*\t0\t0\tGCATCTGACAGCCTGCAACCGCACCCATACGCCCAGATGAGCATCTGACAGCCTGGAACAGCACCC\t*\tSA:Z:chr1,2655749,-,125S394M,1,42;chr1,2667827,-,66M453S,0,3;chr1,2761512,-,95S50M374S,0,3;chrUn_JTFH01001111v1_decoy,1,+,22S273M224S,46,12;chr1_KI270762v1_alt,207249,-,38S252M229S,2,15;\tMD:Z:15G3A46\tRG:Z:GATKSVContigAlignments\tNM:i:2\tAS:i:56\tXS:i:56", true); - final AlignmentInterval canonicalOne = SVTestUtils.fromSAMRecordString("asm000099:tig00029\t16\tchr21\t2655749\t1\t125S394M\t*\t0\t0\tGCATCTGACAGCCTGCAACCGCACCCATACGCCCAGATGAGCATCTGACAGCCTGGAACAGCACCCATACGCCCAGATGAGCATCTGACAGCCTGAAACAGCACCCTGCACCCCCAGGTGTGCAACTGACAGCCTGGAACAGCACACACTCACCCAGGCCAGCATCTGATGGCCTGGAACGGCACCCACACCCCCAGGTAAGCATCCGACATCCTGAAACAGCTCCCACACCCCCAGGCGAGCATCTGACAGCCTGGAGCAGTGCCCACACCCCCAGGTGAGCATCTGACATCGTGGAGCAGCACCCACAGCCCAAGGTGAGCATCTGACAACCAGGAGCAGCACCCACACACCCAGGCGAGCATCAGAATGCACGGAGCATCACCCACACCCCCAGGCGAGCATCCGACAGCCTGGAGCAGCACCCACACCCCCAGGCGAGCATCTGACAGCCTGGAGCAGTGCCCACACCCCCAGGTGAGCATCTGACAGCGTGGAGCAGCACCCACAGCCCAAGGT\t*\tSA:Z:chr1,2667827,-,66M453S,0,3;chr1,2761512,-,95S50M374S,0,3;chrUn_JTFH01001111v1_decoy,1,+,22S273M224S,46,12;chr1_KI270762v1_alt,207249,-,38S252M229S,2,15;chr1_KI270762v1_alt,207251,-,66M453S,0,2;\tMD:Z:15G4C3A8T0G12T5G1A9A2A5G1A4G2G1G0T3G1G4G44A22G59C14T3C10G10T2C42T1C8T0G1T9C0A7A1A0G14G3C4A3A11C4\tRG:Z:GATKSVContigAlignments\tNM:i:42\tAS:i:184\tXS:i:177", true); - final AlignmentInterval canonicalTwo = SVTestUtils.fromSAMRecordString("asm000099:tig00029\t2064\tchr21\t2667827\t0\t66M453H\t*\t0\t0\tGCATCTGACAGCCTGCAACCGCACCCATACGCCCAGATGAGCATCTGACAGCCTGGAACAGCACCC\t*\tSA:Z:chr1,2655749,-,125S394M,1,42;chr1,2761512,-,95S50M374S,0,3;chrUn_JTFH01001111v1_decoy,1,+,22S273M224S,46,12;chr1_KI270762v1_alt,207249,-,38S252M229S,2,15;chr1_KI270762v1_alt,207251,-,66M453S,0,2;\tMD:Z:10A4G3A46\tRG:Z:GATKSVContigAlignments\tNM:i:3\tAS:i:51\tXS:i:51", true); - final AlignmentInterval canonicalThree = SVTestUtils.fromSAMRecordString("asm000099:tig00029\t2064\tchr21\t2761512\t0\t95H50M374H\t*\t0\t0\tAAACAGCACCCTGCACCCCCAGGTGTGCAACTGACAGCCTGGAACAGCAC\t*\tSA:Z:chr1,2655749,-,125S394M,1,42;chr1,2667827,-,66M453S,0,3;chrUn_JTFH01001111v1_decoy,1,+,22S273M224S,46,12;chr1_KI270762v1_alt,207249,-,38S252M229S,2,15;chr1_KI270762v1_alt,207251,-,66M453S,0,2;\tMD:Z:25A3T11A8\tRG:Z:GATKSVContigAlignments\tNM:i:3\tAS:i:35\tXS:i:35", true); + final AlignmentInterval altOne = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000099:tig00029\t2064\tchr1_KI270762v1_alt\t207249\t2\t38H252M229H\t*\t0\t0\tGAGCATCTGACAGCCTGGAACAGCACCCATACGCCCAGATGAGCATCTGACAGCCTGAAACAGCACCCTGCACCCCCAGGTGTGCAACTGACAGCCTGGAACAGCACACACTCACCCAGGCCAGCATCTGATGGCCTGGAACGGCACCCACACCCCCAGGTAAGCATCCGACATCCTGAAACAGCTCCCACACCCCCAGGCGAGCATCTGACAGCCTGGAGCAGTGCCCACACCCCCAGGTGAGCATCTGAC\t*\tSA:Z:chr1,2655749,-,125S394M,1,42;chr1,2667827,-,66M453S,0,3;chr1,2761512,-,95S50M374S,0,3;chrUn_JTFH01001111v1_decoy,1,+,22S273M224S,46,12;chr1_KI270762v1_alt,207251,-,66M453S,0,2;\tMD:Z:57G11A2T13C20C12G0G19T19G30A7T7C15C0A4T21\tRG:Z:GATKSVContigAlignments\tNM:i:15\tAS:i:177\tXS:i:172", true); + final AlignmentInterval altTwo = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000099:tig00029\t2064\tchr1_KI270762v1_alt\t207251\t0\t66M453H\t*\t0\t0\tGCATCTGACAGCCTGCAACCGCACCCATACGCCCAGATGAGCATCTGACAGCCTGGAACAGCACCC\t*\tSA:Z:chr1,2655749,-,125S394M,1,42;chr1,2667827,-,66M453S,0,3;chr1,2761512,-,95S50M374S,0,3;chrUn_JTFH01001111v1_decoy,1,+,22S273M224S,46,12;chr1_KI270762v1_alt,207249,-,38S252M229S,2,15;\tMD:Z:15G3A46\tRG:Z:GATKSVContigAlignments\tNM:i:2\tAS:i:56\tXS:i:56", true); + final AlignmentInterval canonicalOne = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000099:tig00029\t16\tchr21\t2655749\t1\t125S394M\t*\t0\t0\tGCATCTGACAGCCTGCAACCGCACCCATACGCCCAGATGAGCATCTGACAGCCTGGAACAGCACCCATACGCCCAGATGAGCATCTGACAGCCTGAAACAGCACCCTGCACCCCCAGGTGTGCAACTGACAGCCTGGAACAGCACACACTCACCCAGGCCAGCATCTGATGGCCTGGAACGGCACCCACACCCCCAGGTAAGCATCCGACATCCTGAAACAGCTCCCACACCCCCAGGCGAGCATCTGACAGCCTGGAGCAGTGCCCACACCCCCAGGTGAGCATCTGACATCGTGGAGCAGCACCCACAGCCCAAGGTGAGCATCTGACAACCAGGAGCAGCACCCACACACCCAGGCGAGCATCAGAATGCACGGAGCATCACCCACACCCCCAGGCGAGCATCCGACAGCCTGGAGCAGCACCCACACCCCCAGGCGAGCATCTGACAGCCTGGAGCAGTGCCCACACCCCCAGGTGAGCATCTGACAGCGTGGAGCAGCACCCACAGCCCAAGGT\t*\tSA:Z:chr1,2667827,-,66M453S,0,3;chr1,2761512,-,95S50M374S,0,3;chrUn_JTFH01001111v1_decoy,1,+,22S273M224S,46,12;chr1_KI270762v1_alt,207249,-,38S252M229S,2,15;chr1_KI270762v1_alt,207251,-,66M453S,0,2;\tMD:Z:15G4C3A8T0G12T5G1A9A2A5G1A4G2G1G0T3G1G4G44A22G59C14T3C10G10T2C42T1C8T0G1T9C0A7A1A0G14G3C4A3A11C4\tRG:Z:GATKSVContigAlignments\tNM:i:42\tAS:i:184\tXS:i:177", true); + final AlignmentInterval canonicalTwo = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000099:tig00029\t2064\tchr21\t2667827\t0\t66M453H\t*\t0\t0\tGCATCTGACAGCCTGCAACCGCACCCATACGCCCAGATGAGCATCTGACAGCCTGGAACAGCACCC\t*\tSA:Z:chr1,2655749,-,125S394M,1,42;chr1,2761512,-,95S50M374S,0,3;chrUn_JTFH01001111v1_decoy,1,+,22S273M224S,46,12;chr1_KI270762v1_alt,207249,-,38S252M229S,2,15;chr1_KI270762v1_alt,207251,-,66M453S,0,2;\tMD:Z:10A4G3A46\tRG:Z:GATKSVContigAlignments\tNM:i:3\tAS:i:51\tXS:i:51", true); + final AlignmentInterval canonicalThree = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000099:tig00029\t2064\tchr21\t2761512\t0\t95H50M374H\t*\t0\t0\tAAACAGCACCCTGCACCCCCAGGTGTGCAACTGACAGCCTGGAACAGCAC\t*\tSA:Z:chr1,2655749,-,125S394M,1,42;chr1,2667827,-,66M453S,0,3;chrUn_JTFH01001111v1_decoy,1,+,22S273M224S,46,12;chr1_KI270762v1_alt,207249,-,38S252M229S,2,15;chr1_KI270762v1_alt,207251,-,66M453S,0,2;\tMD:Z:25A3T11A8\tRG:Z:GATKSVContigAlignments\tNM:i:3\tAS:i:35\tXS:i:35", true); data.add(new Object[]{Arrays.asList(canonicalOne, canonicalTwo, canonicalThree, altOne, altTwo), 184, null}); - final AlignmentInterval canonical = SVTestUtils.fromSAMRecordString("asm002362:tig00002\t16\tchr21\t1422222\t60\t75M56I139M\t*\t0\t0\tATGCTGGGGAATTTGTGTGCTCCTTGGGTGGGGACGAGCATGGAAGGCGCGTGGGACTGAAGCCTTGAAGACCCCGCAGGCGCCTCTCCTGGACAGACCTCGTGCAGGCGCCTCTCCTGGACCGACCTCGTGCAGGCGCCTCTCCTGGACAGACCTCGTGCAGGCGCCTCTCCTGGACCGACCTCGTGCAGGCGCCGCGCTGGACCGACCTCGTGCAGGCGCCGCGCTGGGCCATGGGGAGAGCGAGAGCCTGGTGTGCCCCTCAGGGAC\t*\tSA:Z:chr2_KI270774v1_alt,105288,-,114M1I27M1I127M,56,13;\tMD:Z:214\tRG:Z:GATKSVContigAlignments\tNM:i:56\tAS:i:142\tXS:i:0\n", true); - final AlignmentInterval nonCanonical = SVTestUtils.fromSAMRecordString("asm002362:tig00002\t2064\tchr2_KI270774v1_alt\t105288\t56\t114M1I27M1I127M\t*\t0\t0\tATGCTGGGGAATTTGTGTGCTCCTTGGGTGGGGACGAGCATGGAAGGCGCGTGGGACTGAAGCCTTGAAGACCCCGCAGGCGCCTCTCCTGGACAGACCTCGTGCAGGCGCCTCTCCTGGACCGACCTCGTGCAGGCGCCTCTCCTGGACAGACCTCGTGCAGGCGCCTCTCCTGGACCGACCTCGTGCAGGCGCCGCGCTGGACCGACCTCGTGCAGGCGCCGCGCTGGGCCATGGGGAGAGCGAGAGCCTGGTGTGCCCCTCAGGGAC\t*\tSA:Z:chr2,1422222,-,75M56I139M,60,56;\tMD:Z:94C17G1G6T13T3G1G34A3T9T68T8\tRG:Z:GATKSVContigAlignments\tNM:i:13\tAS:i:179\tXS:i:142", true); + final AlignmentInterval canonical = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm002362:tig00002\t16\tchr21\t1422222\t60\t75M56I139M\t*\t0\t0\tATGCTGGGGAATTTGTGTGCTCCTTGGGTGGGGACGAGCATGGAAGGCGCGTGGGACTGAAGCCTTGAAGACCCCGCAGGCGCCTCTCCTGGACAGACCTCGTGCAGGCGCCTCTCCTGGACCGACCTCGTGCAGGCGCCTCTCCTGGACAGACCTCGTGCAGGCGCCTCTCCTGGACCGACCTCGTGCAGGCGCCGCGCTGGACCGACCTCGTGCAGGCGCCGCGCTGGGCCATGGGGAGAGCGAGAGCCTGGTGTGCCCCTCAGGGAC\t*\tSA:Z:chr2_KI270774v1_alt,105288,-,114M1I27M1I127M,56,13;\tMD:Z:214\tRG:Z:GATKSVContigAlignments\tNM:i:56\tAS:i:142\tXS:i:0\n", true); + final AlignmentInterval nonCanonical = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm002362:tig00002\t2064\tchr2_KI270774v1_alt\t105288\t56\t114M1I27M1I127M\t*\t0\t0\tATGCTGGGGAATTTGTGTGCTCCTTGGGTGGGGACGAGCATGGAAGGCGCGTGGGACTGAAGCCTTGAAGACCCCGCAGGCGCCTCTCCTGGACAGACCTCGTGCAGGCGCCTCTCCTGGACCGACCTCGTGCAGGCGCCTCTCCTGGACAGACCTCGTGCAGGCGCCTCTCCTGGACCGACCTCGTGCAGGCGCCGCGCTGGACCGACCTCGTGCAGGCGCCGCGCTGGGCCATGGGGAGAGCGAGAGCCTGGTGTGCCCCTCAGGGAC\t*\tSA:Z:chr2,1422222,-,75M56I139M,60,56;\tMD:Z:94C17G1G6T13T3G1G34A3T9T68T8\tRG:Z:GATKSVContigAlignments\tNM:i:13\tAS:i:179\tXS:i:142", true); data.add(new Object[]{Arrays.asList(canonical, nonCanonical), 142, nonCanonical}); - final AlignmentInterval one = SVTestUtils.fromSAMRecordString("asm008084:tig00048\t16\tchr20\t188506254\t60\t182S253M\t*\t0\t0\tGTTGTAGATTAACTGATGTCCAAATAGGAGGAAATAGCATAAGATCATGGAAGAGAGAGGGCAGTAGATTACCTGATGTCGAAATAGCAGGTAAGAGCATACTGTCCTGGAAGAGAGGGCTGTAGATTACCTGATGTCCAAATAGTAGGTAATATCGTAATGTGCTGGAAGAGAGAGGGCTTTAGATTACCTGATGTCCAAATAGGAGGAAATAGCATAAGGTCCTGGAGGACAAAGGTGTTGTAGATTACATGATGTCCAAATAGGACGTAATAGCATAAGGTCCTGGAAGACAAAGGTGTTGTAGATTACCTGATGTCCAAATAGGAGGTAATAGAATACGGTCCTGGAAGAGAGGGCTGTAAATTACCCGATGTCAAAATAGGAGGTAATAGAATAAGGTCCTGGAGGAGAGGGCTGTAGATTACCTGATGT\t*\tSA:Z:chr4,188507694,-,229M206S,60,22;chr4_KI270789v1_alt,167965,-,122M60I253M,60,62;\tMD:Z:16T99T42A29T6C19C36\tRG:Z:GATKSVContigAlignments\tNM:i:6\tAS:i:223\tXS:i:157", true); - final AlignmentInterval two = SVTestUtils.fromSAMRecordString("asm008084:tig00048\t2064\tchr20\t188507694\t60\t229M206H\t*\t0\t0\tGTTGTAGATTAACTGATGTCCAAATAGGAGGAAATAGCATAAGATCATGGAAGAGAGAGGGCAGTAGATTACCTGATGTCGAAATAGCAGGTAAGAGCATACTGTCCTGGAAGAGAGGGCTGTAGATTACCTGATGTCCAAATAGTAGGTAATATCGTAATGTGCTGGAAGAGAGAGGGCTTTAGATTACCTGATGTCCAAATAGGAGGAAATAGCATAAGGTCCTGGA\t*\tSA:Z:chr4,188506254,-,182S253M,60,6;chr4_KI270789v1_alt,167965,-,122M60I253M,60,62;\tMD:Z:11C5A4G24C32C13T6A31G11G8G0G0A3G2C0A1T1G12G19G7T3C0A14\tRG:Z:GATKSVContigAlignments\tNM:i:22\tAS:i:119\tXS:i:58", true); - final AlignmentInterval three = SVTestUtils.fromSAMRecordString("asm008084:tig00048\t2064\tchr4_KI270789v1_alt\t167965\t60\t122M60I253M\t*\t0\t0\tGTTGTAGATTAACTGATGTCCAAATAGGAGGAAATAGCATAAGATCATGGAAGAGAGAGGGCAGTAGATTACCTGATGTCGAAATAGCAGGTAAGAGCATACTGTCCTGGAAGAGAGGGCTGTAGATTACCTGATGTCCAAATAGTAGGTAATATCGTAATGTGCTGGAAGAGAGAGGGCTTTAGATTACCTGATGTCCAAATAGGAGGAAATAGCATAAGGTCCTGGAGGACAAAGGTGTTGTAGATTACATGATGTCCAAATAGGACGTAATAGCATAAGGTCCTGGAAGACAAAGGTGTTGTAGATTACCTGATGTCCAAATAGGAGGTAATAGAATACGGTCCTGGAAGAGAGGGCTGTAAATTACCCGATGTCAAAATAGGAGGTAATAGAATAAGGTCCTGGAGGAGAGGGCTGTAGATTACCTGATGT\t*\tSA:Z:chr4,188506254,-,182S253M,60,6;chr4,188507694,-,229M206S,60,22;\tMD:Z:311T6C56\tRG:Z:GATKSVContigAlignments\tNM:i:62\tAS:i:289\tXS:i:223", true); + final AlignmentInterval one = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm008084:tig00048\t16\tchr20\t188506254\t60\t182S253M\t*\t0\t0\tGTTGTAGATTAACTGATGTCCAAATAGGAGGAAATAGCATAAGATCATGGAAGAGAGAGGGCAGTAGATTACCTGATGTCGAAATAGCAGGTAAGAGCATACTGTCCTGGAAGAGAGGGCTGTAGATTACCTGATGTCCAAATAGTAGGTAATATCGTAATGTGCTGGAAGAGAGAGGGCTTTAGATTACCTGATGTCCAAATAGGAGGAAATAGCATAAGGTCCTGGAGGACAAAGGTGTTGTAGATTACATGATGTCCAAATAGGACGTAATAGCATAAGGTCCTGGAAGACAAAGGTGTTGTAGATTACCTGATGTCCAAATAGGAGGTAATAGAATACGGTCCTGGAAGAGAGGGCTGTAAATTACCCGATGTCAAAATAGGAGGTAATAGAATAAGGTCCTGGAGGAGAGGGCTGTAGATTACCTGATGT\t*\tSA:Z:chr4,188507694,-,229M206S,60,22;chr4_KI270789v1_alt,167965,-,122M60I253M,60,62;\tMD:Z:16T99T42A29T6C19C36\tRG:Z:GATKSVContigAlignments\tNM:i:6\tAS:i:223\tXS:i:157", true); + final AlignmentInterval two = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm008084:tig00048\t2064\tchr20\t188507694\t60\t229M206H\t*\t0\t0\tGTTGTAGATTAACTGATGTCCAAATAGGAGGAAATAGCATAAGATCATGGAAGAGAGAGGGCAGTAGATTACCTGATGTCGAAATAGCAGGTAAGAGCATACTGTCCTGGAAGAGAGGGCTGTAGATTACCTGATGTCCAAATAGTAGGTAATATCGTAATGTGCTGGAAGAGAGAGGGCTTTAGATTACCTGATGTCCAAATAGGAGGAAATAGCATAAGGTCCTGGA\t*\tSA:Z:chr4,188506254,-,182S253M,60,6;chr4_KI270789v1_alt,167965,-,122M60I253M,60,62;\tMD:Z:11C5A4G24C32C13T6A31G11G8G0G0A3G2C0A1T1G12G19G7T3C0A14\tRG:Z:GATKSVContigAlignments\tNM:i:22\tAS:i:119\tXS:i:58", true); + final AlignmentInterval three = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm008084:tig00048\t2064\tchr4_KI270789v1_alt\t167965\t60\t122M60I253M\t*\t0\t0\tGTTGTAGATTAACTGATGTCCAAATAGGAGGAAATAGCATAAGATCATGGAAGAGAGAGGGCAGTAGATTACCTGATGTCGAAATAGCAGGTAAGAGCATACTGTCCTGGAAGAGAGGGCTGTAGATTACCTGATGTCCAAATAGTAGGTAATATCGTAATGTGCTGGAAGAGAGAGGGCTTTAGATTACCTGATGTCCAAATAGGAGGAAATAGCATAAGGTCCTGGAGGACAAAGGTGTTGTAGATTACATGATGTCCAAATAGGACGTAATAGCATAAGGTCCTGGAAGACAAAGGTGTTGTAGATTACCTGATGTCCAAATAGGAGGTAATAGAATACGGTCCTGGAAGAGAGGGCTGTAAATTACCCGATGTCAAAATAGGAGGTAATAGAATAAGGTCCTGGAGGAGAGGGCTGTAGATTACCTGATGT\t*\tSA:Z:chr4,188506254,-,182S253M,60,6;chr4,188507694,-,229M206S,60,22;\tMD:Z:311T6C56\tRG:Z:GATKSVContigAlignments\tNM:i:62\tAS:i:289\tXS:i:223", true); data.add(new Object[]{Arrays.asList(one, two, three), 223, three}); - final AlignmentInterval normalOne = SVTestUtils.fromSAMRecordString("asm000012:tig00003\t2048\tchr1\t933803\t60\t317M302H\t*\t0\t0\tTCCTGGAAGGTTTAGAGCCCAGCCTGGGAGTCTTTGGTGCTGAAACGGATCTGCTTAGGGGCAGCCTTGGATTAGCCCAGCTCCAGCCAGCCCAGGTCAGGGGAGCCGGGAGCTATTTAACGAGGTTTAGGGTAGGCTCCCAGGTCACTGCGCAGGACTGCTCCGTTACAGGTGGGCAGGGGAGGCTGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGTGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGC\t*\tSA:Z:chr1,934806,+,163S456M,60,8;\tMD:Z:140T45G3G27G39G30A12C14\tRG:Z:GATKSVContigAlignments\tNM:i:7\tAS:i:282\tXS:i:0", true); - final AlignmentInterval normalTwo = SVTestUtils.fromSAMRecordString("asm000012:tig00003\t0\tchr1\t934806\t60\t163S456M\t*\t0\t0\tTCCTGGAAGGTTTAGAGCCCAGCCTGGGAGTCTTTGGTGCTGAAACGGATCTGCTTAGGGGCAGCCTTGGATTAGCCCAGCTCCAGCCAGCCCAGGTCAGGGGAGCCGGGAGCTATTTAACGAGGTTTAGGGTAGGCTCCCAGGTCACTGCGCAGGACTGCTCCGTTACAGGTGGGCAGGGGAGGCTGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGTGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGCGGTGCTGCAGGAGGACTGCTCAGGGAGTGGCGCCTGGACCCTGAGCCCCTTCTCTGCTGACTGGGGAGAGGCTCACGGAACCGGGAAGGGGTGGAGGGCCGTGCTCCACACAGTTCGTCTCATTGCTCTCTGGGACTCTGTGGATGTGGGATTGGGCTGAATTAGCAAGAAGAGGAGAAATGAGGGAAGAAAAGAGTTAAATGCATGTTGATTCCAAGCCCCCGCCTGCCGGGGGGACAGCGGGAGGTTGGAGCACGCAGCCCTGGTGCCTGGTGCGAGCTGCACGTGTCTGCCGGTG\t*\tSA:Z:chr1,933803,+,317M302S,60,7;\tMD:Z:14G8G18G12G14G24G2G32G324\tRG:Z:GATKSVContigAlignments\tNM:i:8\tAS:i:416\tXS:i:147", true); + final AlignmentInterval normalOne = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000012:tig00003\t2048\tchr1\t933803\t60\t317M302H\t*\t0\t0\tTCCTGGAAGGTTTAGAGCCCAGCCTGGGAGTCTTTGGTGCTGAAACGGATCTGCTTAGGGGCAGCCTTGGATTAGCCCAGCTCCAGCCAGCCCAGGTCAGGGGAGCCGGGAGCTATTTAACGAGGTTTAGGGTAGGCTCCCAGGTCACTGCGCAGGACTGCTCCGTTACAGGTGGGCAGGGGAGGCTGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGTGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGC\t*\tSA:Z:chr1,934806,+,163S456M,60,8;\tMD:Z:140T45G3G27G39G30A12C14\tRG:Z:GATKSVContigAlignments\tNM:i:7\tAS:i:282\tXS:i:0", true); + final AlignmentInterval normalTwo = SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000012:tig00003\t0\tchr1\t934806\t60\t163S456M\t*\t0\t0\tTCCTGGAAGGTTTAGAGCCCAGCCTGGGAGTCTTTGGTGCTGAAACGGATCTGCTTAGGGGCAGCCTTGGATTAGCCCAGCTCCAGCCAGCCCAGGTCAGGGGAGCCGGGAGCTATTTAACGAGGTTTAGGGTAGGCTCCCAGGTCACTGCGCAGGACTGCTCCGTTACAGGTGGGCAGGGGAGGCTGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGTGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGCGGTGCTGCAGGAGGACTGCTCAGGGAGTGGCGCCTGGACCCTGAGCCCCTTCTCTGCTGACTGGGGAGAGGCTCACGGAACCGGGAAGGGGTGGAGGGCCGTGCTCCACACAGTTCGTCTCATTGCTCTCTGGGACTCTGTGGATGTGGGATTGGGCTGAATTAGCAAGAAGAGGAGAAATGAGGGAAGAAAAGAGTTAAATGCATGTTGATTCCAAGCCCCCGCCTGCCGGGGGGACAGCGGGAGGTTGGAGCACGCAGCCCTGGTGCCTGGTGCGAGCTGCACGTGTCTGCCGGTG\t*\tSA:Z:chr1,933803,+,317M302S,60,7;\tMD:Z:14G8G18G12G14G24G2G32G324\tRG:Z:GATKSVContigAlignments\tNM:i:8\tAS:i:416\tXS:i:147", true); data.add(new Object[]{Arrays.asList(normalOne, normalTwo), 416, null}); return data.toArray(new Object[data.size()][]); } @@ -261,7 +261,7 @@ public void testSpecialChanelForSingleNonCanonicalMappings(final List data = new ArrayList<>(20); // case for two equally-good configurations, one has fewer alignments String sam = "asm001160:tig00000\t16\tchr1\t93876139\t60\t516S1317M\t*\t0\t0\tCATGTTGCCCAAGCCAGTCTTGAACTCCAGGGCTCAAAATGCTGAAATTACAGGCACGAGTCACTTACTGCTCTTAACAATCACGTACAAAAATCTTAACATATGATTTTTTTTTTTTTTTTTTGAGACAACATCTCCCTCCATTGCCCAGGCTGGAGTGCAGCGGCACAATCATGGCTCACCGCAGCCTCAATGTCCAGGGCTCAAGCAATCCTCCCACCTCAGCTTCCCAAGTAGCTGGGACCACAGGCGCACAGGGCACGGCTAATTTAAAAAAAATTTTTTGTGTAGAGATAGGGTCTCCTTATATTGCCCAGGCTGATCTCAAACACCTACTTGGGCTCAAGTGATCCTCCTGCCTCAGCCTCACAAAGTGCTGGGATTACAGGCATGAGTCACTGCATCCAACAGATTGATTTCTAATATGTCACCAAAAGGAGCACCTTTAGCTATGATTGGTGGGAAAAATATGACTAAAATAGGTATCCAAAAAGACAAGGGAAATGCTGGATAGAAGAGCCATTCCATGAAGAACCCAAGGCAGTGATTTTCTCATTCCCCAGGCTAACATTTCATATTTTTATGGTAAATTAACCACTTGAAATACATGTATCAAAAACTTATAAAAATAAAGGAAAAACTTACAGTTTAGCCTTTGTGCTATTTAGGAAGTCTTCTTCATCACTAAACTCATCTTCATTTTCGTCATGGTCTGATGAATCTTCTTCACTTTTTTCATCCTCTTCCTCTTCTTTTTCTTCCTCAATGGCAACCTCACTTGCCTTGTCTTCCTCTTCCAAGTAAAAATTTTTATCAGCACTCATTCCAGGAGTTGTGTCAATTACAAACAATGCATTGTCACATGACAGACTTCCTGTGTCTCCACTTAATGACTCCCTTTGGCCACTATTTTCAACAAAACATAAAGTATCCTCTTCATTCTCACTGTTTTCAGACTGTTGGCTTTCATCACTGCTGAGAACTAGTAAGACAGAATTATCTTTACCCTGAGATGTGTTGGGCGCAGACGTGTATAGTTTGGTATCACATTCAAAATCTACATTCCCTTCACTGTTCATGTCTTCACTGACACTTATAACTGTGGACTCTTCTTCATCATCACTACCACCACAATCACCAAACTTTGTCAAGTCACTTGCTTTTATGGGGCTCTTTTTGTTGTTATTCCATCTGCCTACTTCCACAGTTGCAAATGTTTGAGTTAATGATTTCATTACAGCCTCAGAGTTCAGATTAGAGTGCACTGATACAGCATTTTTATTTTGGGGGGTTGAATGTCTCTGAGAAACTAACTGTTGAAGGCTAGTGTCCTGAAGTTCAGAAAGATTCTTCAGCTGAGAACTTTTCTCATTAATTTCTTTCCCCTCATCTGTTATTCCATTGGCATCTTCATCCAAATCCTTACAATTCTGTTTTGTTTCTTTAAGAGATTCAACATTGGCCTGTTCGTGCACTGTTAATATATTTTCTGAACTTCTGTGGGAGAAATCATCATCAAAGTCATTATTATAGAAATTTGGCTTATTTATCTCAGAAAGAGATCTTGCTTGTAAATGGGAAGTTTGTCTGGTATCTGAATCCTCTGAATTCACAGGTGTACCCACGATCTGTTTCTCATTTCCTGGTACAATCTTACTATCTTTCTTTTCAGTTTGTGCCTTTAATTTCCTCTGCATACTCCTGGTTCTTCTAGTTGCAATTCCAGAGAATGAAATGTCTGAGCTTGATGTCTCAGCATCAGATATAGCTTCTGTATGAGATTCTTGGCTTGGATCTGTCAGAGATTTAGCCTTACTTCTTCTGGCTCCTGTA\t*\tSA:Z:chr1,61662787,-,165H74M1594H,12,2,64;chr15,43085606,+,1427H65M341H,0,2,55;chr10,96642377,+,1660H67M106H,60,5,42;chrUn_JTFH01001621v1_decoy,641,-,106M2I408M1317H,60,5,481;\tMD:Z:1317\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:1317\tXS:i:0"; - AlignedContig alignedContig = SVTestUtils.fromPrimarySAMRecordString(sam, true); + AlignedContig alignedContig = SVDiscoveryTestUtilsAndCommonDataProvider.fromPrimarySAMRecordString(sam, true); data.add(new Object[]{alignedContig, Arrays.asList( @@ -303,7 +303,7 @@ private Object[][] forConfigurationSorting() { // case for two equally-good configurations, having same num of alignments, one has lower total NM sam = "asm000168:tig00027\t0\tchr1\t4939534\t60\t54S139M96S\t*\t0\t0\tGTCCTCCGTATGACGTCAGTGTCCTCCATATGACATCAATATCCTCCATATGACATCAATATCCTCCATATGATGTCAGTGTGCTCCATATGACATCAATATCCTCCATATGATGTCAATATCCTGCGTATGATGTCAATATCCTCCGTATGATGTCAATATCCTCCATATGATGTCAATATCCTCTGTATGATGTCAGTGTCCTCCATATGATGTCAATCGCCTCCATATGATGCCAATATCCTCCGTATGATGTCAATGCCCTCCGTATGATGTCAATGTCCTCCGT\t*\tSA:Z:chr1,4939535,+,155H134M,60,15,61;chr1,4939436,+,66M223H,23,3,51;chrUn_JTFH01000538v1_decoy,1338,-,153M136H,60,1,148;\tMD:Z:14A13C10T0G5G24C52G1G12\tRG:Z:GATKSVContigAlignments\tNM:i:8\tAS:i:99\tXS:i:45"; - alignedContig = SVTestUtils.fromPrimarySAMRecordString(sam, true); + alignedContig = SVDiscoveryTestUtilsAndCommonDataProvider.fromPrimarySAMRecordString(sam, true); data.add(new Object[]{alignedContig, Arrays.asList( diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigWithFineTunedAlignmentsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigWithFineTunedAlignmentsUnitTest.java index 82d3f473073..c3dd29fc74c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigWithFineTunedAlignmentsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigWithFineTunedAlignmentsUnitTest.java @@ -5,7 +5,7 @@ import com.esotericsoftware.kryo.io.Output; import htsjdk.samtools.SAMRecord; import org.broadinstitute.hellbender.GATKBaseTest; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVTestUtils; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; @@ -113,9 +113,9 @@ private static List createSAMRecordsWithEssentialInfo(final List canonicalChromosomes, final SAMSequenceDictionary refSeqDict) { final AlignedContig alignedContig = - SVTestUtils.fromPrimarySAMRecordString(primarySAMRecord, true); + SVDiscoveryTestUtilsAndCommonDataProvider.fromPrimarySAMRecordString(primarySAMRecord, true); final AssemblyContigWithFineTunedAlignments intermediate = AssemblyContigAlignmentsConfigPicker.reConstructContigFromPickedConfiguration( diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantCanonicalRepresentationUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantCanonicalRepresentationUnitTest.java index 0e8a481e4e0..a58855af2b7 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantCanonicalRepresentationUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantCanonicalRepresentationUnitTest.java @@ -8,7 +8,7 @@ import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.vcf.VCFConstants; import org.broadinstitute.hellbender.GATKBaseTest; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVTestUtils; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider; import org.broadinstitute.hellbender.tools.spark.sv.discovery.SimpleSVType; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignedContig; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignmentInterval; @@ -57,7 +57,7 @@ public void testSerialization(final CpxVariantCanonicalRepresentation cpxVariant @Test public void testSpecialCtor() { - final AlignedContig alignedContig = SVTestUtils.fromPrimarySAMRecordString("asm000308:tig00000\t0\tchr1\t14491391\t60\t1276M530S\t*\t0\t0\tTTGCTGCACCCATCAATCCGTCGTCTACATTAGGTATTTCTCCTAATGCTATCCCTCCCCTAGTCCCCTACCCGCCGACAGGTCCCGGTGTGTGATATTCCCCTCCCTGTGTCCATGTTACTCTTTTGATATTACCAGGGACACCTGGATTTCTACTGATTTTAATGAGAATACCTTCTGTATTCACCATTAAATATGATGGTAGTTGCTGGTTTTAGCTCGATATTATTTGTCATGCTGATTAAGTGGACTTGCATTCCTAGCTTTCAAAGAGGTTTTCTTTCCTTTTTAATAAGGAGTGGGTGTTGCATGTTATCTAATACATTGTCAGTGTGTGGCTATTTATAAGTTCTGTGTGATTATACCATTTATCTATATAAATGTCCCCTTTGTTCTATTTAATAATGCTTTTCCCCCTTCTGAATTCCACTTTGAATTTGAATTCTACTTTGTCTGAAATAGATCCTGCCACCCCTGCTTTTAAAAAGAAAAAAATCTTTTTGCTTGTATTATTTAACTTTTTTGCCTATCCCTCCCTTTTTAATCTTTTCATACCATTGCTTTTCAGTGTCTCGAGCAGTAAGACATTTAACAATTATCAGCCCCATGCTTACTTTGTGCCAGACACTGGATTAAACAAAAATGGAAAAAGAGGATAGAATGTGCTGGAAGGGGTACATTCAAACCCAGTCTGAACTGGCCACTGCTGTGAGCAGGTTTGGGGACAGCAGTAGATCCTAGAAGGGCCTGACCAGCTGGGGAAACTGGCCAGGCTGTCCAGAGGTGACAAGAGGATTGTCACCCAGACTTGCCCAAGAAGAGTGAATCTGAGTCTTGGAGAGAACAGGAGTTTGGGTTCTTCTGGGCCCAGATGGCCTCAGGGCTCCCTGGAATTTGGGGACCCCACAGTTGGTCGCCACCATGAATTGAGGAGCCTTGCTTCTCTCCACACTGTCTTTTCCCTGCCTCCTCGTGGCTTCTGCTTCACTCATTCACTCATTCTGTCAGTGAATGATTCTTCAGCACCTGCCCTGCATAGGATGCCATTGTAGGTGCTGGGAAATCAACGGGAAGAAGATGGAAAACGAGACTTCCCTTATGAAGCTTCTGTTCTACAGAGGTGGGCAGACATGGCCAGAAAAAGCACAAGGCCATTTCCAATGGTGGAAGGGCCAGGACTGCTGCCCTTTCTGATAGCTTCTCTTTACACTTAGGAGAAAATTCAGGGCCCCATAATCCCTAGGCCCTACATAACCACACATGCACACACCACAAACCACACACACACACCACACACACCACACACCACACGCTACACACACCATGCACATACCACAAACCACACACACACCACACACACACCACACATTACACACACCACACAGACACCACACACCACACAAACATCACACACCACAGACACATCACACACCACACACACACCACACATACACAGCACACAGCTCACGCATACACAGCACACACATCACACATACACATACCACATACACACCACACACACACCACAAACCACATATACACAGCACACACATCACACAAACACATACCACATACACACCACACACCGTACATACACAGCATACACATTACACATACACACACGACACACCACACACAGACCACACACCACACAGATACAGCACAGAGACACTACACATACCACATACACACTACACACCACACACACACCACACATACACAGCGCACATACACACACCACACACACCACATACAAACCACATACCACATACACACCACACATATACCACACAGACACCATACATA\t*\tSA:Z:chr1,14492666,+,1684S71M51S,60,0,71;chr4,8687087,-,422S46M1338S,60,1,41;\tMD:Z:144T1131\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:1271\tXS:i:70", true); + final AlignedContig alignedContig = SVDiscoveryTestUtilsAndCommonDataProvider.fromPrimarySAMRecordString("asm000308:tig00000\t0\tchr1\t14491391\t60\t1276M530S\t*\t0\t0\tTTGCTGCACCCATCAATCCGTCGTCTACATTAGGTATTTCTCCTAATGCTATCCCTCCCCTAGTCCCCTACCCGCCGACAGGTCCCGGTGTGTGATATTCCCCTCCCTGTGTCCATGTTACTCTTTTGATATTACCAGGGACACCTGGATTTCTACTGATTTTAATGAGAATACCTTCTGTATTCACCATTAAATATGATGGTAGTTGCTGGTTTTAGCTCGATATTATTTGTCATGCTGATTAAGTGGACTTGCATTCCTAGCTTTCAAAGAGGTTTTCTTTCCTTTTTAATAAGGAGTGGGTGTTGCATGTTATCTAATACATTGTCAGTGTGTGGCTATTTATAAGTTCTGTGTGATTATACCATTTATCTATATAAATGTCCCCTTTGTTCTATTTAATAATGCTTTTCCCCCTTCTGAATTCCACTTTGAATTTGAATTCTACTTTGTCTGAAATAGATCCTGCCACCCCTGCTTTTAAAAAGAAAAAAATCTTTTTGCTTGTATTATTTAACTTTTTTGCCTATCCCTCCCTTTTTAATCTTTTCATACCATTGCTTTTCAGTGTCTCGAGCAGTAAGACATTTAACAATTATCAGCCCCATGCTTACTTTGTGCCAGACACTGGATTAAACAAAAATGGAAAAAGAGGATAGAATGTGCTGGAAGGGGTACATTCAAACCCAGTCTGAACTGGCCACTGCTGTGAGCAGGTTTGGGGACAGCAGTAGATCCTAGAAGGGCCTGACCAGCTGGGGAAACTGGCCAGGCTGTCCAGAGGTGACAAGAGGATTGTCACCCAGACTTGCCCAAGAAGAGTGAATCTGAGTCTTGGAGAGAACAGGAGTTTGGGTTCTTCTGGGCCCAGATGGCCTCAGGGCTCCCTGGAATTTGGGGACCCCACAGTTGGTCGCCACCATGAATTGAGGAGCCTTGCTTCTCTCCACACTGTCTTTTCCCTGCCTCCTCGTGGCTTCTGCTTCACTCATTCACTCATTCTGTCAGTGAATGATTCTTCAGCACCTGCCCTGCATAGGATGCCATTGTAGGTGCTGGGAAATCAACGGGAAGAAGATGGAAAACGAGACTTCCCTTATGAAGCTTCTGTTCTACAGAGGTGGGCAGACATGGCCAGAAAAAGCACAAGGCCATTTCCAATGGTGGAAGGGCCAGGACTGCTGCCCTTTCTGATAGCTTCTCTTTACACTTAGGAGAAAATTCAGGGCCCCATAATCCCTAGGCCCTACATAACCACACATGCACACACCACAAACCACACACACACACCACACACACCACACACCACACGCTACACACACCATGCACATACCACAAACCACACACACACCACACACACACCACACATTACACACACCACACAGACACCACACACCACACAAACATCACACACCACAGACACATCACACACCACACACACACCACACATACACAGCACACAGCTCACGCATACACAGCACACACATCACACATACACATACCACATACACACCACACACACACCACAAACCACATATACACAGCACACACATCACACAAACACATACCACATACACACCACACACCGTACATACACAGCATACACATTACACATACACACACGACACACCACACACAGACCACACACCACACAGATACAGCACAGAGACACTACACATACCACATACACACTACACACCACACACACACCACACATACACAGCGCACATACACACACCACACACACCACATACAAACCACATACCACATACACACCACACATATACCACACAGACACCATACATA\t*\tSA:Z:chr1,14492666,+,1684S71M51S,60,0,71;chr4,8687087,-,422S46M1338S,60,1,41;\tMD:Z:144T1131\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:1271\tXS:i:70", true); final AssemblyContigWithFineTunedAlignments preprocessedTig = new AssemblyContigWithFineTunedAlignments(alignedContig); final CpxVariantInducingAssemblyContig analysisReadyContig = new CpxVariantInducingAssemblyContig(preprocessedTig, CpxSVInferenceTestUtils.bareBoneHg38SAMSeqDict); final SimpleInterval manuallyCalculatedAffectedRefRegion = new SimpleInterval("chr1", 14492666, 14492666); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInducingAssemblyContigUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInducingAssemblyContigUnitTest.java index f6888687d9b..6af9180c728 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInducingAssemblyContigUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInducingAssemblyContigUnitTest.java @@ -6,7 +6,7 @@ import org.apache.commons.collections4.CollectionUtils; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.exceptions.GATKException; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVTestUtils; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignedContig; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignmentInterval; import org.broadinstitute.hellbender.utils.SimpleInterval; @@ -58,8 +58,8 @@ public void testBasicInfoCtor(final AlignedContig contig, private Object[][] forJumpCtor() { final List data = new ArrayList<>(20); - data.add(new Object[]{SVTestUtils.fromSAMRecordString("asm025517:tig00006\t2048\tchr17\t82596440\t60\t109M142H\t*\t0\t0\tAGCCTCAGAGGTGGCGTCAGGACGTGCCTGCCCCACCGGTTGCCCTGGTGCCCTCGTCACGCCCCGGGACCGTGCACACGTGGGGACTGTTTCCAGACGCACTTTCTGC\t*\tSA:Z:chr17,82596480,+,106S145M,60,0;\tMD:Z:72C36\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:104\tXS:i:19", true), - SVTestUtils.fromSAMRecordString("asm025517:tig00006\t0\tchr17\t82596480\t60\t106S145M\t*\t0\t0\tAGCCTCAGAGGTGGCGTCAGGACGTGCCTGCCCCACCGGTTGCCCTGGTGCCCTCGTCACGCCCCGGGACCGTGCACACGTGGGGACTGTTTCCAGACGCACTTTCTGCCCTGGTGCCCTCGTCACGCCCCGGGACCGCGCACACGTGGGGACTGTTTCCAGACGCACTTTCTGCGGGCAGTCTGTGTGGCAGGGCTCCCTGCCCAGCTCCTGCAGCCTCATCAAGTCTCCCACTAAGGAGGTGTCGCTCC\t*\tSA:Z:chr17,82596440,+,109M142S,60,1;\tMD:Z:145\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:145\tXS:i:20", true), + data.add(new Object[]{SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm025517:tig00006\t2048\tchr17\t82596440\t60\t109M142H\t*\t0\t0\tAGCCTCAGAGGTGGCGTCAGGACGTGCCTGCCCCACCGGTTGCCCTGGTGCCCTCGTCACGCCCCGGGACCGTGCACACGTGGGGACTGTTTCCAGACGCACTTTCTGC\t*\tSA:Z:chr17,82596480,+,106S145M,60,0;\tMD:Z:72C36\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:104\tXS:i:19", true), + SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm025517:tig00006\t0\tchr17\t82596480\t60\t106S145M\t*\t0\t0\tAGCCTCAGAGGTGGCGTCAGGACGTGCCTGCCCCACCGGTTGCCCTGGTGCCCTCGTCACGCCCCGGGACCGTGCACACGTGGGGACTGTTTCCAGACGCACTTTCTGCCCTGGTGCCCTCGTCACGCCCCGGGACCGCGCACACGTGGGGACTGTTTCCAGACGCACTTTCTGCGGGCAGTCTGTGTGGCAGGGCTCCCTGCCCAGCTCCTGCAGCCTCATCAAGTCTCCCACTAAGGAGGTGTCGCTCC\t*\tSA:Z:chr17,82596440,+,109M142S,60,1;\tMD:Z:145\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:145\tXS:i:20", true), null, IllegalArgumentException.class }); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInterpreterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInterpreterUnitTest.java index 4b581979e7b..8024f9fd77f 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInterpreterUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInterpreterUnitTest.java @@ -8,8 +8,7 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource; import org.broadinstitute.hellbender.engine.spark.SparkContextFactory; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVTestUtils; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.SimpleSVDiscoveryTestDataProvider; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.*; import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; import org.broadinstitute.hellbender.utils.SimpleInterval; @@ -59,72 +58,72 @@ private static List validInputsToOverlapYieldingStrategy() { new AlignmentInterval(new SimpleInterval("chr1", 202317371, 202317402), 1104, 1135, TextCigarCodec.decode("1085H18S32M1393H"), false, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL); // from a 3-alignment real event, here we have the first two alignments, though the second alignment should yield overlap to first when considering the whole contig, here we are reusing them for case that higher chr should yield to lower chr - data.add(new ValidLocalData(SVTestUtils.fromSAMRecordString("asm004677:tig00000\t2064\tchr3\t15737523\t60\t1425H377M1D726M\t*\t0\t0\tAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr1,202317371,-,1393S50M1085S,60,1;\tMD:Z:164C19C192^A449T276\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:1071\tXS:i:161", true), - SVTestUtils.fromSAMRecordString("asm004677:tig00000\t2064\tchr1\t202317371\t60\t1393H50M1085H\t*\t0\t0\tGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCAC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr3,15737523,-,1425S377M1D726M,60,4;\tMD:Z:41T8\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:45\tXS:i:0", true), + data.add(new ValidLocalData(SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm004677:tig00000\t2064\tchr3\t15737523\t60\t1425H377M1D726M\t*\t0\t0\tAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr1,202317371,-,1393S50M1085S,60,1;\tMD:Z:164C19C192^A449T276\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:1071\tXS:i:161", true), + SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm004677:tig00000\t2064\tchr1\t202317371\t60\t1393H50M1085H\t*\t0\t0\tGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCAC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr3,15737523,-,1425S377M1D726M,60,4;\tMD:Z:41T8\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:45\tXS:i:0", true), 18, true, null, new Tuple2<>(new AlignmentInterval(new SimpleInterval("chr3", 15737541, 15738626), 1, 1085, TextCigarCodec.decode("726M1D359M18S1425H"), false, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL), new AlignmentInterval(new SimpleInterval("chr1", 202317371, 202317420), 1086, 1135, TextCigarCodec.decode("1085H50M1393H"), false, 60, 1, 45, ContigAlignmentsModifier.AlnModType.NONE)) )); // DEL '+'-rep - data.add(new ValidLocalData(SVTestUtils.fromSAMRecordString("asm016186:tig00003\t2048\tchr10\t6055203\t60\t221M1064H\t*\t0\t0\tAAATGCATTAAAGGTTGAGAAGTACTGCTTGAGCATACCTGTTCCTAGTGAATCAGACCAACCATGGAAATCATTATTCCATCTCTCTAATTTGGTGAGTTTCAATCCTAAGTGCGATTTGGAAGTATTCAAGGAACTGGAAGGGAAGGTATTGTGATGTTCGGTTCTGTTCCCCAGAGAGTCCGCTTAATTCATCTTATGAGTCTTGGCCATCACTTTTT\t*\tSA:Z:chr10,6055906,+,205S1080M,60,0;\tMD:Z:221\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:221\tXS:i:0", true), - SVTestUtils.fromSAMRecordString("asm016186:tig00003\t0\tchr10\t6055906\t60\t205S1080M\t*\t0\t0\tAAATGCATTAAAGGTTGAGAAGTACTGCTTGAGCATACCTGTTCCTAGTGAATCAGACCAACCATGGAAATCATTATTCCATCTCTCTAATTTGGTGAGTTTCAATCCTAAGTGCGATTTGGAAGTATTCAAGGAACTGGAAGGGAAGGTATTGTGATGTTCGGTTCTGTTCCCCAGAGAGTCCGCTTAATTCATCTTATGAGTCTTGGCCATCACTTTTTAAAGCATCCTGGATATACACTAATGTACAGCTGGCGTTTTGAACCATTACTCTCCACACATTTCCCAGTGGGAACTGTGTTTTCTTTTTGACAGAAGCAAACCACGGAGTCGCCTTGAGATCAGTCAAAACACTGAGTAATCTTAGAGTGGAGAATACCTAAAATTTCTAGAGACAATTATATTATTGTTTTCCCCTAAATGAGTGAGTTACTTGAGAATATGGTGGGGTATTCAGTGGGGGCTATTGGCAAACACTGTTGGCTGACTTCAGAATGGGAGCCATTCAGCAGCAGTTTCATCTCCCGTGTTAAACTGTGAGCACAAGAGGATAAGAGCTGGAACTTTCTTCATCTCTGTAATCCTCATGCATGAATGGATGGATGAAGGAATATCACTGAGAATTCATGTAAAGATGTGCAGATGTGCAGGGAATATTGAACTGAAATTTAAGAGGGCTGGATTCAAGTTCAGCTTCTATGTGAACTCTTTCAGTGAAGAAGGTCTTATAACTTTTAGAAGTAATCAACTGAATTAAGATTCTCTTTTCAGCTGGGCACAGTGGCTCATGCCTGTAATCCCAGCACTTTGGAAGGCCCATGCAGATGGATGGCTTGAGGCTCGGAGTTCAAGACCAGCCTACGCAACATAGCAAAAACCCCCTCTCTACTAAAATTACAAAAAAGGAGCCAGGCATGGTGGTGCACACCTGTAATCCCAGCTACTCAGGAGACAGAGGCAGGAGAATCACTTGAACCCAGGAGCTGAAGGTTGCAGTGAGTTGAGATTGCACCACTGTGCTCTAGCCTGGGCGACAGAGCGAAACTGTCCAAAAAAAAAAAAGATTCCCCTTTCTCTTCTCTCAGGGAATGGCCTGGGATAGAGAAGAGAAACAGGTTCCCACTCATGTAAAACTTCTCCCATGGGAGACAAGGACATTGTTTGAGGGGAAAATTCCTACACAAGCAAACAAACAGCAGAGGACCCCGCCCTCCCTGTCCAGGGCAGGAGCACAGTGGACCACCTGCTGCCCCTGTGTCTTGGGGCCTCTCTCCCTGGAATGTCA\t*\tSA:Z:chr10,6055203,+,221M1064S,60,0;\tMD:Z:1080\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:1080\tXS:i:39", true), + data.add(new ValidLocalData(SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm016186:tig00003\t2048\tchr10\t6055203\t60\t221M1064H\t*\t0\t0\tAAATGCATTAAAGGTTGAGAAGTACTGCTTGAGCATACCTGTTCCTAGTGAATCAGACCAACCATGGAAATCATTATTCCATCTCTCTAATTTGGTGAGTTTCAATCCTAAGTGCGATTTGGAAGTATTCAAGGAACTGGAAGGGAAGGTATTGTGATGTTCGGTTCTGTTCCCCAGAGAGTCCGCTTAATTCATCTTATGAGTCTTGGCCATCACTTTTT\t*\tSA:Z:chr10,6055906,+,205S1080M,60,0;\tMD:Z:221\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:221\tXS:i:0", true), + SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm016186:tig00003\t0\tchr10\t6055906\t60\t205S1080M\t*\t0\t0\tAAATGCATTAAAGGTTGAGAAGTACTGCTTGAGCATACCTGTTCCTAGTGAATCAGACCAACCATGGAAATCATTATTCCATCTCTCTAATTTGGTGAGTTTCAATCCTAAGTGCGATTTGGAAGTATTCAAGGAACTGGAAGGGAAGGTATTGTGATGTTCGGTTCTGTTCCCCAGAGAGTCCGCTTAATTCATCTTATGAGTCTTGGCCATCACTTTTTAAAGCATCCTGGATATACACTAATGTACAGCTGGCGTTTTGAACCATTACTCTCCACACATTTCCCAGTGGGAACTGTGTTTTCTTTTTGACAGAAGCAAACCACGGAGTCGCCTTGAGATCAGTCAAAACACTGAGTAATCTTAGAGTGGAGAATACCTAAAATTTCTAGAGACAATTATATTATTGTTTTCCCCTAAATGAGTGAGTTACTTGAGAATATGGTGGGGTATTCAGTGGGGGCTATTGGCAAACACTGTTGGCTGACTTCAGAATGGGAGCCATTCAGCAGCAGTTTCATCTCCCGTGTTAAACTGTGAGCACAAGAGGATAAGAGCTGGAACTTTCTTCATCTCTGTAATCCTCATGCATGAATGGATGGATGAAGGAATATCACTGAGAATTCATGTAAAGATGTGCAGATGTGCAGGGAATATTGAACTGAAATTTAAGAGGGCTGGATTCAAGTTCAGCTTCTATGTGAACTCTTTCAGTGAAGAAGGTCTTATAACTTTTAGAAGTAATCAACTGAATTAAGATTCTCTTTTCAGCTGGGCACAGTGGCTCATGCCTGTAATCCCAGCACTTTGGAAGGCCCATGCAGATGGATGGCTTGAGGCTCGGAGTTCAAGACCAGCCTACGCAACATAGCAAAAACCCCCTCTCTACTAAAATTACAAAAAAGGAGCCAGGCATGGTGGTGCACACCTGTAATCCCAGCTACTCAGGAGACAGAGGCAGGAGAATCACTTGAACCCAGGAGCTGAAGGTTGCAGTGAGTTGAGATTGCACCACTGTGCTCTAGCCTGGGCGACAGAGCGAAACTGTCCAAAAAAAAAAAAGATTCCCCTTTCTCTTCTCTCAGGGAATGGCCTGGGATAGAGAAGAGAAACAGGTTCCCACTCATGTAAAACTTCTCCCATGGGAGACAAGGACATTGTTTGAGGGGAAAATTCCTACACAAGCAAACAAACAGCAGAGGACCCCGCCCTCCCTGTCCAGGGCAGGAGCACAGTGGACCACCTGCTGCCCCTGTGTCTTGGGGCCTCTCTCCCTGGAATGTCA\t*\tSA:Z:chr10,6055203,+,221M1064S,60,0;\tMD:Z:1080\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:1080\tXS:i:39", true), 16, true, null, new Tuple2<>(new AlignmentInterval(new SimpleInterval("chr10", 6055203, 6055407), 1, 205, TextCigarCodec.decode("205M16S1064H"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL), new AlignmentInterval(new SimpleInterval("chr10", 6055906, 6056985), 206, 1285, TextCigarCodec.decode("205S1080M"), true, 60, 0, 1080, ContigAlignmentsModifier.AlnModType.NONE)) )); // DEL '-'-rep - data.add(new ValidLocalData(SVTestUtils.fromSAMRecordString("asm011675:tig00000\t2064\tchr7\t5803446\t60\t1046H558M\t*\t0\t0\tATATGTTTCCATGATTTCCATGAGGAAGGCATGTGCTCTGTCCCTTTCTTCCCAATGCTCACTGCGTCTTTTTTTTTTTAATAGACTTGACTTTTTTAGGCCAGTTTTAGATTCGTAGCAACATTGAGCGAAAGGTACAGAGAGTTCTCACATGTTCCCTGCCCGTGCTAATAGCCTCTCCCATTATCATCTTCCACCAAAGTGGTACATTTGTTACAATCAGGGAACCTATATTGACATCATTTTCACCCAAAGTCCACAGTTTGCATTAGGGTCCCTCTTGGTGTGGTGCATGTCATGGGCTTGGATAAATGTGTCAGAACATGCATCCACTGTTAGAGCTCTCTCAGAGGCGTTTCCCTGCCCGAAAAATCCTCCGTGCTCTGCCTCTTCATGCATCCCTCCCTTCATGCATCCGTCCCAGGGACAAAAGGTTGTCCCTGGCAACCTTTTGTTTCCAGACCCTAGCAACGGATCCTTTTACTGTCTCCATAGTTGGGCCTTTTCTGAATGTAATATAGTTGGAATCATACAGCATTAATTTTTTTTTTCCACTGA\t*\tSA:Z:chr7,5802154,-,1009M1D120M475S,60,4;\tMD:Z:8G325C223\tRG:Z:GATKSVContigAlignments\tNM:i:2\tAS:i:548\tXS:i:0", true), - SVTestUtils.fromSAMRecordString("asm011675:tig00000\t16\tchr7\t5802154\t60\t1009M1D120M475S\t*\t0\t0\tATATTTTCTGAGATTTATAAGCTTTTTTAAAAAATTAAGGGCTGGGCACAGTGGTTCACACTTGTAAGTGCAGCACTTTGGGAGGCTAAGGCAGGAGGATTGATTGAGGTCAGGAGTTGGAGGCTACAGTGAGCTATGATTGCACCACTGCACTCTAGCCTGGGAGACAGAGTGAGACCCTGACTCAAACAGTAATTAAATCAATAAAATTTAGAAGTTAAGATTCTTCAGCCTCTTTTGGGCTGGGCATGGTCGCTCAAGCTTGTAATCCCAGCACTCTGGGAGGCTGTGGCAGGCTGATCACTTGAGGCCAGGAGTTTGAGACCAACCTGGCCAACATTGTGAAACCCCATCTCTACTAAAAATACAGAAATTAGCCGGTGTGGTGATGCATGCCTGTAATCCCAGCTGTTCAGGAGGCTGAGACAGGAGAATTGCTTGAACTCGGGAGGTGGAGGTTGCAGTGAGCTGAGAGCATGTCACTCTATTCCAGCCCGGGCGACAGAACTAGACTCCGTCTCAAAAAAAAAAAAAAAAAGATTCTTCAGTCTCTTTTGATCTTCCTGTGCCCACTTTATGGTGCCCGGAGCTGCTGATGTTCAGATTTGCCATGGGCGGTGCTCCCCTACATCTGAAGATGCAAAGATCTCTCTTCTTCCTTGTCACCTAATCCTGCTGGCCTTCTCAGGCTCATCTGCAGAAGACCCCACTCAAAAGTAGGGTCTGGCCAGCTGCGGTGGCTCACGCTTGTAATCCCAGCACTTTGGGAGGCTGAGGCAGGTGGGATCACCTGAGGTCAGGAGTTCAAGAACAGCCTGACCAATGTGGCGAAACCCTGTCTCTACTAAAAAATACCAAAATTAGCCAGGCGTAGTGGTGGGCGCCTATAACCCCATCTACTCGGGAGGCTGAGGCTGGAGAATAGCTTGAACCTGGGGGTTGAAGGTTGCAGTGAGTCAAGATGATGCCACTGCACTCCAGTCTGGGTGAAAGAGCAAAACTCCATCTCAAAAAAAAAAAAAAAAAGGGGGGGTCTTCTTCAGAAGATATGTTTCCATGATTTCCATGAGGAAGGCATGTGCTCTGTCCCTTTCTTCCCAATGCTCACTGCGTCTTTTTTTTTTTAATAGACTTGACTTTTTTAGGCCAGTTTTAGATTCGTAGCAACATTGAGCGAAAGGTACAGAGAGTTCTCACATGTTCCCTGCCCGTGCTAATAGCCTCTCCCATTATCATCTTCCACCAAAGTGGTACATTTGTTACAATCAGGGAACCTATATTGACATCATTTTCACCCAAAGTCCACAGTTTGCATTAGGGTCCCTCTTGGTGTGGTGCATGTCATGGGCTTGGATAAATGTGTCAGAACATGCATCCACTGTTAGAGCTCTCTCAGAGGCGTTTCCCTGCCCGAAAAATCCTCCGTGCTCTGCCTCTTCATGCATCCCTCCCTTCATGCATCCGTCCCAGGGACAAAAGGTTGTCCCTGGCAACCTTTTGTTTCCAGACCCTAGCAACGGATCCTTTTACTGTCTCCATAGTTGGGCCTTTTCTGAATGTAATATAGTTGGAATCATACAGCATTAATTTTTTTTTTCCACTGA\t*\tSA:Z:chr7,5803446,-,1046S558M,60,2;\tMD:Z:516A337T154^A45G74\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:1097\tXS:i:1029", true), + data.add(new ValidLocalData(SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm011675:tig00000\t2064\tchr7\t5803446\t60\t1046H558M\t*\t0\t0\tATATGTTTCCATGATTTCCATGAGGAAGGCATGTGCTCTGTCCCTTTCTTCCCAATGCTCACTGCGTCTTTTTTTTTTTAATAGACTTGACTTTTTTAGGCCAGTTTTAGATTCGTAGCAACATTGAGCGAAAGGTACAGAGAGTTCTCACATGTTCCCTGCCCGTGCTAATAGCCTCTCCCATTATCATCTTCCACCAAAGTGGTACATTTGTTACAATCAGGGAACCTATATTGACATCATTTTCACCCAAAGTCCACAGTTTGCATTAGGGTCCCTCTTGGTGTGGTGCATGTCATGGGCTTGGATAAATGTGTCAGAACATGCATCCACTGTTAGAGCTCTCTCAGAGGCGTTTCCCTGCCCGAAAAATCCTCCGTGCTCTGCCTCTTCATGCATCCCTCCCTTCATGCATCCGTCCCAGGGACAAAAGGTTGTCCCTGGCAACCTTTTGTTTCCAGACCCTAGCAACGGATCCTTTTACTGTCTCCATAGTTGGGCCTTTTCTGAATGTAATATAGTTGGAATCATACAGCATTAATTTTTTTTTTCCACTGA\t*\tSA:Z:chr7,5802154,-,1009M1D120M475S,60,4;\tMD:Z:8G325C223\tRG:Z:GATKSVContigAlignments\tNM:i:2\tAS:i:548\tXS:i:0", true), + SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm011675:tig00000\t16\tchr7\t5802154\t60\t1009M1D120M475S\t*\t0\t0\tATATTTTCTGAGATTTATAAGCTTTTTTAAAAAATTAAGGGCTGGGCACAGTGGTTCACACTTGTAAGTGCAGCACTTTGGGAGGCTAAGGCAGGAGGATTGATTGAGGTCAGGAGTTGGAGGCTACAGTGAGCTATGATTGCACCACTGCACTCTAGCCTGGGAGACAGAGTGAGACCCTGACTCAAACAGTAATTAAATCAATAAAATTTAGAAGTTAAGATTCTTCAGCCTCTTTTGGGCTGGGCATGGTCGCTCAAGCTTGTAATCCCAGCACTCTGGGAGGCTGTGGCAGGCTGATCACTTGAGGCCAGGAGTTTGAGACCAACCTGGCCAACATTGTGAAACCCCATCTCTACTAAAAATACAGAAATTAGCCGGTGTGGTGATGCATGCCTGTAATCCCAGCTGTTCAGGAGGCTGAGACAGGAGAATTGCTTGAACTCGGGAGGTGGAGGTTGCAGTGAGCTGAGAGCATGTCACTCTATTCCAGCCCGGGCGACAGAACTAGACTCCGTCTCAAAAAAAAAAAAAAAAAGATTCTTCAGTCTCTTTTGATCTTCCTGTGCCCACTTTATGGTGCCCGGAGCTGCTGATGTTCAGATTTGCCATGGGCGGTGCTCCCCTACATCTGAAGATGCAAAGATCTCTCTTCTTCCTTGTCACCTAATCCTGCTGGCCTTCTCAGGCTCATCTGCAGAAGACCCCACTCAAAAGTAGGGTCTGGCCAGCTGCGGTGGCTCACGCTTGTAATCCCAGCACTTTGGGAGGCTGAGGCAGGTGGGATCACCTGAGGTCAGGAGTTCAAGAACAGCCTGACCAATGTGGCGAAACCCTGTCTCTACTAAAAAATACCAAAATTAGCCAGGCGTAGTGGTGGGCGCCTATAACCCCATCTACTCGGGAGGCTGAGGCTGGAGAATAGCTTGAACCTGGGGGTTGAAGGTTGCAGTGAGTCAAGATGATGCCACTGCACTCCAGTCTGGGTGAAAGAGCAAAACTCCATCTCAAAAAAAAAAAAAAAAAGGGGGGGTCTTCTTCAGAAGATATGTTTCCATGATTTCCATGAGGAAGGCATGTGCTCTGTCCCTTTCTTCCCAATGCTCACTGCGTCTTTTTTTTTTTAATAGACTTGACTTTTTTAGGCCAGTTTTAGATTCGTAGCAACATTGAGCGAAAGGTACAGAGAGTTCTCACATGTTCCCTGCCCGTGCTAATAGCCTCTCCCATTATCATCTTCCACCAAAGTGGTACATTTGTTACAATCAGGGAACCTATATTGACATCATTTTCACCCAAAGTCCACAGTTTGCATTAGGGTCCCTCTTGGTGTGGTGCATGTCATGGGCTTGGATAAATGTGTCAGAACATGCATCCACTGTTAGAGCTCTCTCAGAGGCGTTTCCCTGCCCGAAAAATCCTCCGTGCTCTGCCTCTTCATGCATCCCTCCCTTCATGCATCCGTCCCAGGGACAAAAGGTTGTCCCTGGCAACCTTTTGTTTCCAGACCCTAGCAACGGATCCTTTTACTGTCTCCATAGTTGGGCCTTTTCTGAATGTAATATAGTTGGAATCATACAGCATTAATTTTTTTTTTCCACTGA\t*\tSA:Z:chr7,5803446,-,1046S558M,60,2;\tMD:Z:516A337T154^A45G74\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:1097\tXS:i:1029", true), 123, false, null, new Tuple2<>(new AlignmentInterval(new SimpleInterval("chr7", 5803446, 5804003), 1, 558, TextCigarCodec.decode("558M1046H"), false, 60, 2, 548, ContigAlignmentsModifier.AlnModType.NONE), new AlignmentInterval(new SimpleInterval("chr7", 5802154, 5803159), 599, 1604, TextCigarCodec.decode("598S1006M"), false, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL)) )); // INV55 '+'-rep - data.add(new ValidLocalData(SVTestUtils.fromSAMRecordString("asm000295:tig00009\t0\tchr1\t13235227\t60\t570M151S\t*\t0\t0\tGGATTACAGGCGTGAGCCACCGCGCCCGGCCGGGGGACTCTATCTCAAAAAAAAAAAAAAAAAATTCAGTAGTAAAACTTTTGGTTAGCAGGGCACGGCTGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCAGATCATGAGGTCAGGAGATCGACACCATCCTGGCTAACATGGTGAAACCGCATCTCTACTAAAAATAGAAAAAAAATTAGCCAGGCGTGGTGGCAGGTGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCGGGAGAATGGCATGAACCCAGGAGGCAGAGCTTGCAGTGAGCCAAGATCATGCCACTGCACTCCAGCCTCGGTGACAGAGCAAGACTCCGTCTCAAAAATAAAAAACAAAAAAAAACTTTCGGTTAGTGTAATCTAGTCTTCCCTGTAGATGTAGCTAATTTTATTTTATTTTTATTATTATTTTTATTGAGACAGAGTCTTCCTCTGTCTGCCAGACCGGAGTACAATGGTGCGATCTCGGCTCACTGCAACCTCCATCTCCCGAGTTCAAGCGATTCTCCTGCCTCAGCCTCCTGAGTGGCTGGGATTACAAATGTGCACCACCACGCTTTGCTAAGTTTTGTATTTTTACTAGAGACAGCGTTTCCCATGTTGCCCAGGCTGGTCTTGAACTTGTGATCTCTGGTGATCTGCCCACCTCGGCCTCCCAAAGTTGTTGGGAGTG\t*\tSA:Z:chr1,13384970,-,218M503S,60,1;\tMD:Z:531T0G0C4T1G7T15A5\tRG:Z:GATKSVContigAlignments\tNM:i:7\tAS:i:535\tXS:i:0", true), - SVTestUtils.fromSAMRecordString("asm000295:tig00009\t2064\tchr1\t13384970\t60\t218M503H\t*\t0\t0\tCACTCCCAACAACTTTGGGAGGCCGAGGTGGGCAGATCACCAGAGATCACAAGTTCAAGACCAGCCTGGGCAACATGGGAAACGCTGTCTCTAGTAAAAATACAAAACTTAGCAAAGCGTGGTGGTGCACATTTGTAATCCCAGCCACTCAGGAGGCTGAGGCAGGAGAATCGCTTGAACTCGGGAGATGGAGGTTGCAGTGAGCCGAGATCGCACCA\t*\tSA:Z:chr1,13235227,+,570M151S,60,7;\tMD:Z:212A5\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:213\tXS:i:0", true), + data.add(new ValidLocalData(SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000295:tig00009\t0\tchr1\t13235227\t60\t570M151S\t*\t0\t0\tGGATTACAGGCGTGAGCCACCGCGCCCGGCCGGGGGACTCTATCTCAAAAAAAAAAAAAAAAAATTCAGTAGTAAAACTTTTGGTTAGCAGGGCACGGCTGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCAGATCATGAGGTCAGGAGATCGACACCATCCTGGCTAACATGGTGAAACCGCATCTCTACTAAAAATAGAAAAAAAATTAGCCAGGCGTGGTGGCAGGTGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCGGGAGAATGGCATGAACCCAGGAGGCAGAGCTTGCAGTGAGCCAAGATCATGCCACTGCACTCCAGCCTCGGTGACAGAGCAAGACTCCGTCTCAAAAATAAAAAACAAAAAAAAACTTTCGGTTAGTGTAATCTAGTCTTCCCTGTAGATGTAGCTAATTTTATTTTATTTTTATTATTATTTTTATTGAGACAGAGTCTTCCTCTGTCTGCCAGACCGGAGTACAATGGTGCGATCTCGGCTCACTGCAACCTCCATCTCCCGAGTTCAAGCGATTCTCCTGCCTCAGCCTCCTGAGTGGCTGGGATTACAAATGTGCACCACCACGCTTTGCTAAGTTTTGTATTTTTACTAGAGACAGCGTTTCCCATGTTGCCCAGGCTGGTCTTGAACTTGTGATCTCTGGTGATCTGCCCACCTCGGCCTCCCAAAGTTGTTGGGAGTG\t*\tSA:Z:chr1,13384970,-,218M503S,60,1;\tMD:Z:531T0G0C4T1G7T15A5\tRG:Z:GATKSVContigAlignments\tNM:i:7\tAS:i:535\tXS:i:0", true), + SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000295:tig00009\t2064\tchr1\t13384970\t60\t218M503H\t*\t0\t0\tCACTCCCAACAACTTTGGGAGGCCGAGGTGGGCAGATCACCAGAGATCACAAGTTCAAGACCAGCCTGGGCAACATGGGAAACGCTGTCTCTAGTAAAAATACAAAACTTAGCAAAGCGTGGTGGTGCACATTTGTAATCCCAGCCACTCAGGAGGCTGAGGCAGGAGAATCGCTTGAACTCGGGAGATGGAGGTTGCAGTGAGCCGAGATCGCACCA\t*\tSA:Z:chr1,13235227,+,570M151S,60,7;\tMD:Z:212A5\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:213\tXS:i:0", true), 67, true, null, new Tuple2<>(new AlignmentInterval(new SimpleInterval("chr1", 13235227, 13235729), 1, 503, TextCigarCodec.decode("503M218S"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL), new AlignmentInterval(new SimpleInterval("chr1", 13384970, 13385187), 504, 721, TextCigarCodec.decode("503H218M"), false, 60, 1, 213, ContigAlignmentsModifier.AlnModType.NONE)) )); // INV55 '-'-rep - data.add(new ValidLocalData(SVTestUtils.fromSAMRecordString("asm001039:tig00012\t2048\tchr1\t81195731\t60\t143M145H\t*\t0\t0\tTCACTAAATTCAGTACATACTCAAGGAATGAGGAAGCAAGCTCCCCCCTCCTGGAGGAAGGAGTATCAAATATCTTGTAATAATTAATAAATATTTGAAGGTAGACATTTTGAGGCTATGCCGATATCCTGTTTTTCTTTAAA\t*\tSA:Z:chr1,81194519,-,148M140S,60,0;\tMD:Z:122A20\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:138\tXS:i:22", true), - SVTestUtils.fromSAMRecordString("asm001039:tig00012\t16\tchr1\t81194519\t60\t148M140S\t*\t0\t0\tTCTAAAAACTGCTAAGAAACTCAAGTTTTCTGAAGTGGTCTGTTCGGAGGTTTGTGCATTTTTCAGTGATCTGTAACAATGTATTTTTCTTCCCACCATCTACTGCAGGTGGGTTCACAGGTGGGACCTGAGGAGCATTTGGGGATTTAAAGAAAAACAGGATATCGGCATAGCCTCAAAATGTCTACCTTCAAATATTTATTAATTATTACAAGATATTTGATACTCCTTCCTCCAGGAGGGGGGAGCTTGCTTCCTCATTCCTTGAGTATGTACTGAATTTAGTGA\t*\tSA:Z:chr1,81195731,+,143M145S,60,1;\tMD:Z:148\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:148\tXS:i:19", true), + data.add(new ValidLocalData(SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm001039:tig00012\t2048\tchr1\t81195731\t60\t143M145H\t*\t0\t0\tTCACTAAATTCAGTACATACTCAAGGAATGAGGAAGCAAGCTCCCCCCTCCTGGAGGAAGGAGTATCAAATATCTTGTAATAATTAATAAATATTTGAAGGTAGACATTTTGAGGCTATGCCGATATCCTGTTTTTCTTTAAA\t*\tSA:Z:chr1,81194519,-,148M140S,60,0;\tMD:Z:122A20\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:138\tXS:i:22", true), + SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm001039:tig00012\t16\tchr1\t81194519\t60\t148M140S\t*\t0\t0\tTCTAAAAACTGCTAAGAAACTCAAGTTTTCTGAAGTGGTCTGTTCGGAGGTTTGTGCATTTTTCAGTGATCTGTAACAATGTATTTTTCTTCCCACCATCTACTGCAGGTGGGTTCACAGGTGGGACCTGAGGAGCATTTGGGGATTTAAAGAAAAACAGGATATCGGCATAGCCTCAAAATGTCTACCTTCAAATATTTATTAATTATTACAAGATATTTGATACTCCTTCCTCCAGGAGGGGGGAGCTTGCTTCCTCATTCCTTGAGTATGTACTGAATTTAGTGA\t*\tSA:Z:chr1,81195731,+,143M145S,60,1;\tMD:Z:148\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:148\tXS:i:19", true), 3, false, null, new Tuple2<>(new AlignmentInterval(new SimpleInterval("chr1", 81195731, 81195873), 1, 143, TextCigarCodec.decode("143M145H"), true, 60, 1, 138, ContigAlignmentsModifier.AlnModType.NONE), new AlignmentInterval(new SimpleInterval("chr1", 81194519, 81194663), 144, 288, TextCigarCodec.decode("143S145M"), false, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL)) )); // INV33 '+'-rep - data.add(new ValidLocalData(SVTestUtils.fromSAMRecordString("asm000619:tig00014\t2064\tchr1\t34802227\t60\t149H126M\t*\t0\t0\tGGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTT\t*\tSA:Z:chr1,54510617,+,75S200M,60,0;\tMD:Z:126\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:126\tXS:i:0", true), - SVTestUtils.fromSAMRecordString("asm000619:tig00014\t0\tchr1\t54510617\t60\t75S200M\t*\t0\t0\tAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCCGAAAAACAAACTAAAAAACCCCACAAAAACAAACAAAACAAAATGCAGTGTATTTGGAGAGAACGGGTGTCTGGTTATTTTGTGCTTTGTATCAAGTTAGCCCAAAACTTAGTGGCATAAAACAATCAAGCATTGTGCTCGCAGATTAT\t*\tSA:Z:chr1,34802227,-,149S126M,60,0;\tMD:Z:200\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:200\tXS:i:22", true), + data.add(new ValidLocalData(SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000619:tig00014\t2064\tchr1\t34802227\t60\t149H126M\t*\t0\t0\tGGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTT\t*\tSA:Z:chr1,54510617,+,75S200M,60,0;\tMD:Z:126\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:126\tXS:i:0", true), + SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm000619:tig00014\t0\tchr1\t54510617\t60\t75S200M\t*\t0\t0\tAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCCGAAAAACAAACTAAAAAACCCCACAAAAACAAACAAAACAAAATGCAGTGTATTTGGAGAGAACGGGTGTCTGGTTATTTTGTGCTTTGTATCAAGTTAGCCCAAAACTTAGTGGCATAAAACAATCAAGCATTGTGCTCGCAGATTAT\t*\tSA:Z:chr1,34802227,-,149S126M,60,0;\tMD:Z:200\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:200\tXS:i:22", true), 51, false, null, new Tuple2<>(new AlignmentInterval(new SimpleInterval("chr1", 34802227, 34802352), 1, 126, TextCigarCodec.decode("126M149H"), false, 60, 0, 126, ContigAlignmentsModifier.AlnModType.NONE), new AlignmentInterval(new SimpleInterval("chr1", 54510668, 54510816), 127, 275, TextCigarCodec.decode("126S149M"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL)) )); // INV33 '-'-rep - data.add(new ValidLocalData(SVTestUtils.fromSAMRecordString("asm016915:tig00033\t16\tchr10\t73660151\t60\t140S145M\t*\t0\t0\tCCTTTCTCTGGATCTCATCTGGAATTGGCCTCACAGTTTGTGAACAGCCCAGAAATAGCCAATGTCTTAGCTTTGGAACGTTTGCCACTTTCCAGCTACAGCTGGACCTTGTATCCTGGTTTGGTTCCTGATGACTTTCAAAGAACAAATGGCTTCCAGCAAGAGAAAAAGGGGATGCAACATTTTTACAAATTATTTCTTTTTTTATTAAAAAAATTTTAAGTTAAATGCTAAAGATATGTTTAACCTCTCTGATACTGACTTGCTCATGAGAAGAAAGAGAGA\t*\tSA:Z:chr10,73659956,+,143S142M,60,0;\tMD:Z:145\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:145\tXS:i:74", true), - SVTestUtils.fromSAMRecordString("asm016915:tig00033\t2048\tchr10\t73659956\t60\t143H142M\t*\t0\t0\tTTTGAAAGTCATCAGGAACCAAACCAGGATACAAGGTCCAGCTGTAGCTGGAAAGTGGCAAACGTTCCAAAGCTAAGACATTGGCTATTTCTGGGCTGTTCACAAACTGTGAGGCCAATTCCAGATGAGATCCAGAGAAAGG\t*\tSA:Z:chr10,73660151,-,140S145M,60,0;\tMD:Z:142\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:142\tXS:i:112", true), + data.add(new ValidLocalData(SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm016915:tig00033\t16\tchr10\t73660151\t60\t140S145M\t*\t0\t0\tCCTTTCTCTGGATCTCATCTGGAATTGGCCTCACAGTTTGTGAACAGCCCAGAAATAGCCAATGTCTTAGCTTTGGAACGTTTGCCACTTTCCAGCTACAGCTGGACCTTGTATCCTGGTTTGGTTCCTGATGACTTTCAAAGAACAAATGGCTTCCAGCAAGAGAAAAAGGGGATGCAACATTTTTACAAATTATTTCTTTTTTTATTAAAAAAATTTTAAGTTAAATGCTAAAGATATGTTTAACCTCTCTGATACTGACTTGCTCATGAGAAGAAAGAGAGA\t*\tSA:Z:chr10,73659956,+,143S142M,60,0;\tMD:Z:145\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:145\tXS:i:74", true), + SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm016915:tig00033\t2048\tchr10\t73659956\t60\t143H142M\t*\t0\t0\tTTTGAAAGTCATCAGGAACCAAACCAGGATACAAGGTCCAGCTGTAGCTGGAAAGTGGCAAACGTTCCAAAGCTAAGACATTGGCTATTTCTGGGCTGTTCACAAACTGTGAGGCCAATTCCAGATGAGATCCAGAGAAAGG\t*\tSA:Z:chr10,73660151,-,140S145M,60,0;\tMD:Z:142\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:142\tXS:i:112", true), 2, true, null, new Tuple2<>(new AlignmentInterval(new SimpleInterval("chr10", 73660153, 73660295), 1, 143, TextCigarCodec.decode("143M142S"), false, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL), new AlignmentInterval(new SimpleInterval("chr10", 73659956, 73660097), 144, 285, TextCigarCodec.decode("143H142M"), true, 60, 0, 142, ContigAlignmentsModifier.AlnModType.NONE)) )); // invdup one yield to two - data.add(new ValidLocalData(SVTestUtils.fromSAMRecordString("asm031213:tig00068\t16\tchrUn_GL000195v1\t49574\t48\t125S270M\t*\t0\t0\tCCAACATGAAGAAACCCCGTCTCTACTAAAAATACAAAATTATCCAGGTGTGGTGTTGTATGCCTGTAATCCTAGCTACTCGGGAGGCCGAGGCAGGAGAATCGCTTGAACCCAGGAGGTGGAGGTTACAGGCATACAACACCACACCTGGATAATTTTGTATTTTTAGTAGAGACGGGGTTTCTTAATGTTGGTCAGGCTGGTCTCATACTCCTGATCTCAGATCATCTGCCCACCTAGGCCTCCCAAAATGCAGGGATTACAGGCATGAGTCACAATGCCCGGCTGTAATTCCCTCTCTTTTATACCTTAGATTTGAATAATTTTTGCTGGATTCTTCAAACATGAAGTATTTTTTGAATTGGAAACTAACTGAATGACTAACTGGTAAGTAG\t*\tSA:Z:chrUn_GL000195v1,49512,+,264S131M,48,1;\tMD:Z:61C21A14G69T70A30\tRG:Z:GATKSVContigAlignments\tNM:i:5\tAS:i:245\tXS:i:216", true), - SVTestUtils.fromSAMRecordString("asm031213:tig00068\t2048\tchrUn_GL000195v1\t49512\t48\t264H131M\t*\t0\t0\tCTGTAACCTCCACCTCCTGGGTTCAAGCGATTCTCCTGCCTCGGCCTCCCGAGTAGCTAGGATTACAGGCATACAACACCACACCTGGATAATTTTGTATTTTTAGTAGAGACGGGGTTTCTTCATGTTGG\t*\tSA:Z:chrUn_GL000195v1,49574,-,125S270M,48,5;\tMD:Z:20T110\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:126\tXS:i:62", true), + data.add(new ValidLocalData(SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm031213:tig00068\t16\tchrUn_GL000195v1\t49574\t48\t125S270M\t*\t0\t0\tCCAACATGAAGAAACCCCGTCTCTACTAAAAATACAAAATTATCCAGGTGTGGTGTTGTATGCCTGTAATCCTAGCTACTCGGGAGGCCGAGGCAGGAGAATCGCTTGAACCCAGGAGGTGGAGGTTACAGGCATACAACACCACACCTGGATAATTTTGTATTTTTAGTAGAGACGGGGTTTCTTAATGTTGGTCAGGCTGGTCTCATACTCCTGATCTCAGATCATCTGCCCACCTAGGCCTCCCAAAATGCAGGGATTACAGGCATGAGTCACAATGCCCGGCTGTAATTCCCTCTCTTTTATACCTTAGATTTGAATAATTTTTGCTGGATTCTTCAAACATGAAGTATTTTTTGAATTGGAAACTAACTGAATGACTAACTGGTAAGTAG\t*\tSA:Z:chrUn_GL000195v1,49512,+,264S131M,48,1;\tMD:Z:61C21A14G69T70A30\tRG:Z:GATKSVContigAlignments\tNM:i:5\tAS:i:245\tXS:i:216", true), + SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm031213:tig00068\t2048\tchrUn_GL000195v1\t49512\t48\t264H131M\t*\t0\t0\tCTGTAACCTCCACCTCCTGGGTTCAAGCGATTCTCCTGCCTCGGCCTCCCGAGTAGCTAGGATTACAGGCATACAACACCACACCTGGATAATTTTGTATTTTTAGTAGAGACGGGGTTTCTTCATGTTGG\t*\tSA:Z:chrUn_GL000195v1,49574,-,125S270M,48,5;\tMD:Z:20T110\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:126\tXS:i:62", true), 6, true, null, new Tuple2<>(new AlignmentInterval(new SimpleInterval("chrUn_GL000195v1", 49580, 49843), 1, 264, TextCigarCodec.decode("264M131S"), false, 48, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL), new AlignmentInterval(new SimpleInterval("chrUn_GL000195v1", 49512, 49642), 265, 395, TextCigarCodec.decode("264H131M"), true, 48, 1, 126, ContigAlignmentsModifier.AlnModType.NONE)) )); // invdup two yield to one - data.add(new ValidLocalData(SVTestUtils.fromSAMRecordString("asm030182:tig00002\t2048\tchrX\t52729025\t46\t120M249H\t*\t0\t0\tGATAGCATTAGGAGATATACTTAATGAGAAATGACAAGTTAATGGGTGCAGCATACCAACATGGCACATGTATACATATGTAACAAACCTGCACATTGTACACATGTACCCTAAAACTTA\t*\tSA:Z:chrX,52729135,-,261M108S,60,0;\tMD:Z:53C66\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:115\tXS:i:95", true), - SVTestUtils.fromSAMRecordString("asm030182:tig00002\t16\tchrX\t52729135\t60\t261M108S\t*\t0\t0\tCTAAAACTTAAAGTACAATAATAATAAAATTTAAAAAAATGTTTTCAAGGATCAATTCTTAACAGTAGAGGAAAATAGGAAAGCGTGTCAGTGGTCCACCAGAAATATTGAGGCATTCCTGGGAGATAGAGTAGATGGGGTCAGACTGATAGAGAAACCCAAGGAGACAAGACCACAGCTCAAATCACTGTAGGCGAGAGATGCTGTTTGTTTTTTGAGACGGAGACTTACTCTGTCGCCCAGGCTGAGTAAGTTTTAGGGTACATGTGTACAATGTGCAGGTTTGTTACATATGTATACATGTGCCATGTTGGTATGCTGCACCCATTAACTTGTCATTTCTCATTAAGTATATCTCCTAATGCTATC\t*\tSA:Z:chrX,52729025,+,120M249S,46,1;\tMD:Z:261\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:261\tXS:i:187", true), + data.add(new ValidLocalData(SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm030182:tig00002\t2048\tchrX\t52729025\t46\t120M249H\t*\t0\t0\tGATAGCATTAGGAGATATACTTAATGAGAAATGACAAGTTAATGGGTGCAGCATACCAACATGGCACATGTATACATATGTAACAAACCTGCACATTGTACACATGTACCCTAAAACTTA\t*\tSA:Z:chrX,52729135,-,261M108S,60,0;\tMD:Z:53C66\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:115\tXS:i:95", true), + SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm030182:tig00002\t16\tchrX\t52729135\t60\t261M108S\t*\t0\t0\tCTAAAACTTAAAGTACAATAATAATAAAATTTAAAAAAATGTTTTCAAGGATCAATTCTTAACAGTAGAGGAAAATAGGAAAGCGTGTCAGTGGTCCACCAGAAATATTGAGGCATTCCTGGGAGATAGAGTAGATGGGGTCAGACTGATAGAGAAACCCAAGGAGACAAGACCACAGCTCAAATCACTGTAGGCGAGAGATGCTGTTTGTTTTTTGAGACGGAGACTTACTCTGTCGCCCAGGCTGAGTAAGTTTTAGGGTACATGTGTACAATGTGCAGGTTTGTTACATATGTATACATGTGCCATGTTGGTATGCTGCACCCATTAACTTGTCATTTCTCATTAAGTATATCTCCTAATGCTATC\t*\tSA:Z:chrX,52729025,+,120M249S,46,1;\tMD:Z:261\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:261\tXS:i:187", true), 12, false, null, new Tuple2<>(new AlignmentInterval(new SimpleInterval("chrX", 52729025, 52729144), 1, 120, TextCigarCodec.decode("120M249H"), true, 46, 1, 115, ContigAlignmentsModifier.AlnModType.NONE), new AlignmentInterval(new SimpleInterval("chrX", 52729135, 52729383), 121, 369, TextCigarCodec.decode("120S249M"), false, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL)) @@ -143,8 +142,8 @@ private Object[][] forOverlapYieldingStrategy() { data.add(new Object[]{two, one, true, CpxSVInferenceTestUtils.bareBoneHg38SAMSeqDict, IllegalArgumentException.class}); - data.add(new Object[]{SVTestUtils.fromSAMRecordString("asm004677:tig00000\t2064\tchr1\t202317371\t60\t1393H50M1085H\t*\t0\t0\tGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCAC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr3,15737523,-,1425S377M1D726M,60,4;\tMD:Z:41T8\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:45\tXS:i:0", true), - SVTestUtils.fromSAMRecordString("asm004677:tig00000\t16\tchr3\t15736242\t60\t1282M1246S\t*\t0\t0\tTTATCTCTTACTGTGCCTTAATTATAAATTAAATTTTATCATCGGTATGTATAGGAAAAAAACACAGTATCTATAGGGTTTGGTACTATCCAATGTTTCAGGCATCCATAGGGGGTCCAGGAACATATCCTCTGTAGATAAGTGGGAACTACTTTGAAAAAGACATGGCAGCCACTAGCTCCCAGCCACAGAATCTTGGGAATGTTACTTTACCTATAAGTTCCTACCCCTGCAACAAAAATGAAAGACTAGACAAGTTGGCTTCAAAGGTTCTTCTGTTACCTTCTCTAAATCTTTGGTGTACACATAAACCCCAATCTTTGCTATGACACCTAGCACAGACAACGTTTGTTGTGTAAGATAACTAAATAGCAATTTTCTTAAATTGTTTTGATGGTTCTGTCACTAAAGTATTATCTTGACTAAATGTTAATAAACTACTAAAAAGCATGTTTTCTAAATTCTGCCTTCCTTTGCTATTTACTTCTGGAAGAACAATAACATATCTCTGTCTTCACATATTTTTCCTGCCACAATAATAGAACATACTTTTCCTGTTGTGTTAAGAGTTGGTATTGTTTTCCTTGGTCACCACAGGTTAACTGTGCACAAGTAGTATCTACTACATACATTTCTAAATTTGAGAAAGTTATAAAAGGAGGCAAAATTAGACAAAAATGAGAAAGATAAATAGTTCTGTATGCCTTTACTATGCAAAAATGCAAGTTCTTTAATAAGTGGGGGGTGGACAGGAGGAGAGAAAAATAATGGTCTAATTGAATGCCACCTACCTCACCATGTCCACTGAAAGCTGCATGATGTAATGCAGTCCTCCCTGCTCGATCAGATACGTTTACATTACTCAGAAGAGGTACCAAAGCTTCAGCACACTTTACAGCTTTATTAGCAGCAGCTATATGTAAAGGGGTTTGCCAATTTTTGTCTCGAGCATTAACATCTGCAGAATGCTTCAAAAGTACCTGAACTGCTTCCTAAAACATATGAAAAGTTATAAAAGACAAATGAGTTAAGAGTTTGAGAAATTATTTCTATATATAGTGTGCGTGTGTGTGTATAAATATGTATCTACATATGAAGAAAATAAAAAAGCTCTCATAGAACTCACTATAAACAAGTAGGAATTTACCCAAGGAAAGAAAATATTTTGAATGTAAAGCGATACTGAGAGTACTCAGAGCCATATAAGATGCATCAAGATCAAACATTCCCTTTCCCTACCCTACCATTAAATGCATAAACATTCCTATCAAAATGTGCAAGAAGAAATACTGACAACTATTATTCAAAAACATTCATTTCTTATATCTTATTTTTCTAGGTAACAAGATACAATTTATACTTTGAATAAATAGTTTTTTGGTTTTGTTTTTTGGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15737523,-,1425S377M1D726M,60,4;chr1,202317371,-,1393S50M1085S,60,1;\tMD:Z:1282\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:1282\tXS:i:0", true), + data.add(new Object[]{SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm004677:tig00000\t2064\tchr1\t202317371\t60\t1393H50M1085H\t*\t0\t0\tGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCAC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr3,15737523,-,1425S377M1D726M,60,4;\tMD:Z:41T8\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:45\tXS:i:0", true), + SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm004677:tig00000\t16\tchr3\t15736242\t60\t1282M1246S\t*\t0\t0\tTTATCTCTTACTGTGCCTTAATTATAAATTAAATTTTATCATCGGTATGTATAGGAAAAAAACACAGTATCTATAGGGTTTGGTACTATCCAATGTTTCAGGCATCCATAGGGGGTCCAGGAACATATCCTCTGTAGATAAGTGGGAACTACTTTGAAAAAGACATGGCAGCCACTAGCTCCCAGCCACAGAATCTTGGGAATGTTACTTTACCTATAAGTTCCTACCCCTGCAACAAAAATGAAAGACTAGACAAGTTGGCTTCAAAGGTTCTTCTGTTACCTTCTCTAAATCTTTGGTGTACACATAAACCCCAATCTTTGCTATGACACCTAGCACAGACAACGTTTGTTGTGTAAGATAACTAAATAGCAATTTTCTTAAATTGTTTTGATGGTTCTGTCACTAAAGTATTATCTTGACTAAATGTTAATAAACTACTAAAAAGCATGTTTTCTAAATTCTGCCTTCCTTTGCTATTTACTTCTGGAAGAACAATAACATATCTCTGTCTTCACATATTTTTCCTGCCACAATAATAGAACATACTTTTCCTGTTGTGTTAAGAGTTGGTATTGTTTTCCTTGGTCACCACAGGTTAACTGTGCACAAGTAGTATCTACTACATACATTTCTAAATTTGAGAAAGTTATAAAAGGAGGCAAAATTAGACAAAAATGAGAAAGATAAATAGTTCTGTATGCCTTTACTATGCAAAAATGCAAGTTCTTTAATAAGTGGGGGGTGGACAGGAGGAGAGAAAAATAATGGTCTAATTGAATGCCACCTACCTCACCATGTCCACTGAAAGCTGCATGATGTAATGCAGTCCTCCCTGCTCGATCAGATACGTTTACATTACTCAGAAGAGGTACCAAAGCTTCAGCACACTTTACAGCTTTATTAGCAGCAGCTATATGTAAAGGGGTTTGCCAATTTTTGTCTCGAGCATTAACATCTGCAGAATGCTTCAAAAGTACCTGAACTGCTTCCTAAAACATATGAAAAGTTATAAAAGACAAATGAGTTAAGAGTTTGAGAAATTATTTCTATATATAGTGTGCGTGTGTGTGTATAAATATGTATCTACATATGAAGAAAATAAAAAAGCTCTCATAGAACTCACTATAAACAAGTAGGAATTTACCCAAGGAAAGAAAATATTTTGAATGTAAAGCGATACTGAGAGTACTCAGAGCCATATAAGATGCATCAAGATCAAACATTCCCTTTCCCTACCCTACCATTAAATGCATAAACATTCCTATCAAAATGTGCAAGAAGAAATACTGACAACTATTATTCAAAAACATTCATTTCTTATATCTTATTTTTCTAGGTAACAAGATACAATTTATACTTTGAATAAATAGTTTTTTGGTTTTGTTTTTTGGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15737523,-,1425S377M1D726M,60,4;chr1,202317371,-,1393S50M1085S,60,1;\tMD:Z:1282\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:1282\tXS:i:0", true), true, null, IllegalArgumentException.class }); @@ -195,8 +194,8 @@ private Object[][] forRemoveOverlap() { data.add(new Object[]{one, two, 30, CpxSVInferenceTestUtils.bareBoneHg38SAMSeqDict, false, false, null, IllegalArgumentException.class}); // not-overlapping - data.add(new Object[]{SVTestUtils.fromSAMRecordString("asm004677:tig00000\t2064\tchr3\t15737523\t60\t1425H377M1D726M\t*\t0\t0\tAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr1,202317371,-,1393S50M1085S,60,1;\tMD:Z:164C19C192^A449T276\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:1071\tXS:i:161", true), - SVTestUtils.fromSAMRecordString("asm004677:tig00000\t16\tchr3\t15736242\t60\t1282M1246S\t*\t0\t0\tTTATCTCTTACTGTGCCTTAATTATAAATTAAATTTTATCATCGGTATGTATAGGAAAAAAACACAGTATCTATAGGGTTTGGTACTATCCAATGTTTCAGGCATCCATAGGGGGTCCAGGAACATATCCTCTGTAGATAAGTGGGAACTACTTTGAAAAAGACATGGCAGCCACTAGCTCCCAGCCACAGAATCTTGGGAATGTTACTTTACCTATAAGTTCCTACCCCTGCAACAAAAATGAAAGACTAGACAAGTTGGCTTCAAAGGTTCTTCTGTTACCTTCTCTAAATCTTTGGTGTACACATAAACCCCAATCTTTGCTATGACACCTAGCACAGACAACGTTTGTTGTGTAAGATAACTAAATAGCAATTTTCTTAAATTGTTTTGATGGTTCTGTCACTAAAGTATTATCTTGACTAAATGTTAATAAACTACTAAAAAGCATGTTTTCTAAATTCTGCCTTCCTTTGCTATTTACTTCTGGAAGAACAATAACATATCTCTGTCTTCACATATTTTTCCTGCCACAATAATAGAACATACTTTTCCTGTTGTGTTAAGAGTTGGTATTGTTTTCCTTGGTCACCACAGGTTAACTGTGCACAAGTAGTATCTACTACATACATTTCTAAATTTGAGAAAGTTATAAAAGGAGGCAAAATTAGACAAAAATGAGAAAGATAAATAGTTCTGTATGCCTTTACTATGCAAAAATGCAAGTTCTTTAATAAGTGGGGGGTGGACAGGAGGAGAGAAAAATAATGGTCTAATTGAATGCCACCTACCTCACCATGTCCACTGAAAGCTGCATGATGTAATGCAGTCCTCCCTGCTCGATCAGATACGTTTACATTACTCAGAAGAGGTACCAAAGCTTCAGCACACTTTACAGCTTTATTAGCAGCAGCTATATGTAAAGGGGTTTGCCAATTTTTGTCTCGAGCATTAACATCTGCAGAATGCTTCAAAAGTACCTGAACTGCTTCCTAAAACATATGAAAAGTTATAAAAGACAAATGAGTTAAGAGTTTGAGAAATTATTTCTATATATAGTGTGCGTGTGTGTGTATAAATATGTATCTACATATGAAGAAAATAAAAAAGCTCTCATAGAACTCACTATAAACAAGTAGGAATTTACCCAAGGAAAGAAAATATTTTGAATGTAAAGCGATACTGAGAGTACTCAGAGCCATATAAGATGCATCAAGATCAAACATTCCCTTTCCCTACCCTACCATTAAATGCATAAACATTCCTATCAAAATGTGCAAGAAGAAATACTGACAACTATTATTCAAAAACATTCATTTCTTATATCTTATTTTTCTAGGTAACAAGATACAATTTATACTTTGAATAAATAGTTTTTTGGTTTTGTTTTTTGGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15737523,-,1425S377M1D726M,60,4;chr1,202317371,-,1393H50M1085H,60,1;\tMD:Z:1282\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:1282\tXS:i:0", true), + data.add(new Object[]{SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm004677:tig00000\t2064\tchr3\t15737523\t60\t1425H377M1D726M\t*\t0\t0\tAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr1,202317371,-,1393S50M1085S,60,1;\tMD:Z:164C19C192^A449T276\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:1071\tXS:i:161", true), + SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm004677:tig00000\t16\tchr3\t15736242\t60\t1282M1246S\t*\t0\t0\tTTATCTCTTACTGTGCCTTAATTATAAATTAAATTTTATCATCGGTATGTATAGGAAAAAAACACAGTATCTATAGGGTTTGGTACTATCCAATGTTTCAGGCATCCATAGGGGGTCCAGGAACATATCCTCTGTAGATAAGTGGGAACTACTTTGAAAAAGACATGGCAGCCACTAGCTCCCAGCCACAGAATCTTGGGAATGTTACTTTACCTATAAGTTCCTACCCCTGCAACAAAAATGAAAGACTAGACAAGTTGGCTTCAAAGGTTCTTCTGTTACCTTCTCTAAATCTTTGGTGTACACATAAACCCCAATCTTTGCTATGACACCTAGCACAGACAACGTTTGTTGTGTAAGATAACTAAATAGCAATTTTCTTAAATTGTTTTGATGGTTCTGTCACTAAAGTATTATCTTGACTAAATGTTAATAAACTACTAAAAAGCATGTTTTCTAAATTCTGCCTTCCTTTGCTATTTACTTCTGGAAGAACAATAACATATCTCTGTCTTCACATATTTTTCCTGCCACAATAATAGAACATACTTTTCCTGTTGTGTTAAGAGTTGGTATTGTTTTCCTTGGTCACCACAGGTTAACTGTGCACAAGTAGTATCTACTACATACATTTCTAAATTTGAGAAAGTTATAAAAGGAGGCAAAATTAGACAAAAATGAGAAAGATAAATAGTTCTGTATGCCTTTACTATGCAAAAATGCAAGTTCTTTAATAAGTGGGGGGTGGACAGGAGGAGAGAAAAATAATGGTCTAATTGAATGCCACCTACCTCACCATGTCCACTGAAAGCTGCATGATGTAATGCAGTCCTCCCTGCTCGATCAGATACGTTTACATTACTCAGAAGAGGTACCAAAGCTTCAGCACACTTTACAGCTTTATTAGCAGCAGCTATATGTAAAGGGGTTTGCCAATTTTTGTCTCGAGCATTAACATCTGCAGAATGCTTCAAAAGTACCTGAACTGCTTCCTAAAACATATGAAAAGTTATAAAAGACAAATGAGTTAAGAGTTTGAGAAATTATTTCTATATATAGTGTGCGTGTGTGTGTATAAATATGTATCTACATATGAAGAAAATAAAAAAGCTCTCATAGAACTCACTATAAACAAGTAGGAATTTACCCAAGGAAAGAAAATATTTTGAATGTAAAGCGATACTGAGAGTACTCAGAGCCATATAAGATGCATCAAGATCAAACATTCCCTTTCCCTACCCTACCATTAAATGCATAAACATTCCTATCAAAATGTGCAAGAAGAAATACTGACAACTATTATTCAAAAACATTCATTTCTTATATCTTATTTTTCTAGGTAACAAGATACAATTTATACTTTGAATAAATAGTTTTTTGGTTTTGTTTTTTGGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15737523,-,1425S377M1D726M,60,4;chr1,202317371,-,1393H50M1085H,60,1;\tMD:Z:1282\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:1282\tXS:i:0", true), 0, CpxSVInferenceTestUtils.bareBoneHg38SAMSeqDict, false, false, null, IllegalArgumentException.class }); @@ -239,7 +238,7 @@ public void testRemoveOverlap(final AlignmentInterval one, final AlignmentInterv private Object[][] forTestDeOverlapAlignments() { final List data = new ArrayList<>(20); - final AlignedContig contigOne = SVTestUtils.fromPrimarySAMRecordString("asm004677:tig00000\t16\tchr3\t15736242\t60\t1282M1246S\t*\t0\t0\tTTATCTCTTACTGTGCCTTAATTATAAATTAAATTTTATCATCGGTATGTATAGGAAAAAAACACAGTATCTATAGGGTTTGGTACTATCCAATGTTTCAGGCATCCATAGGGGGTCCAGGAACATATCCTCTGTAGATAAGTGGGAACTACTTTGAAAAAGACATGGCAGCCACTAGCTCCCAGCCACAGAATCTTGGGAATGTTACTTTACCTATAAGTTCCTACCCCTGCAACAAAAATGAAAGACTAGACAAGTTGGCTTCAAAGGTTCTTCTGTTACCTTCTCTAAATCTTTGGTGTACACATAAACCCCAATCTTTGCTATGACACCTAGCACAGACAACGTTTGTTGTGTAAGATAACTAAATAGCAATTTTCTTAAATTGTTTTGATGGTTCTGTCACTAAAGTATTATCTTGACTAAATGTTAATAAACTACTAAAAAGCATGTTTTCTAAATTCTGCCTTCCTTTGCTATTTACTTCTGGAAGAACAATAACATATCTCTGTCTTCACATATTTTTCCTGCCACAATAATAGAACATACTTTTCCTGTTGTGTTAAGAGTTGGTATTGTTTTCCTTGGTCACCACAGGTTAACTGTGCACAAGTAGTATCTACTACATACATTTCTAAATTTGAGAAAGTTATAAAAGGAGGCAAAATTAGACAAAAATGAGAAAGATAAATAGTTCTGTATGCCTTTACTATGCAAAAATGCAAGTTCTTTAATAAGTGGGGGGTGGACAGGAGGAGAGAAAAATAATGGTCTAATTGAATGCCACCTACCTCACCATGTCCACTGAAAGCTGCATGATGTAATGCAGTCCTCCCTGCTCGATCAGATACGTTTACATTACTCAGAAGAGGTACCAAAGCTTCAGCACACTTTACAGCTTTATTAGCAGCAGCTATATGTAAAGGGGTTTGCCAATTTTTGTCTCGAGCATTAACATCTGCAGAATGCTTCAAAAGTACCTGAACTGCTTCCTAAAACATATGAAAAGTTATAAAAGACAAATGAGTTAAGAGTTTGAGAAATTATTTCTATATATAGTGTGCGTGTGTGTGTATAAATATGTATCTACATATGAAGAAAATAAAAAAGCTCTCATAGAACTCACTATAAACAAGTAGGAATTTACCCAAGGAAAGAAAATATTTTGAATGTAAAGCGATACTGAGAGTACTCAGAGCCATATAAGATGCATCAAGATCAAACATTCCCTTTCCCTACCCTACCATTAAATGCATAAACATTCCTATCAAAATGTGCAAGAAGAAATACTGACAACTATTATTCAAAAACATTCATTTCTTATATCTTATTTTTCTAGGTAACAAGATACAATTTATACTTTGAATAAATAGTTTTTTGGTTTTGTTTTTTGGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15737523,-,1425H377M1D726M,60,4,1071;chr1,202317371,-,1393H50M1085H,60,1,45;\tMD:Z:1282\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:1282\tXS:i:0", + final AlignedContig contigOne = SVDiscoveryTestUtilsAndCommonDataProvider.fromPrimarySAMRecordString("asm004677:tig00000\t16\tchr3\t15736242\t60\t1282M1246S\t*\t0\t0\tTTATCTCTTACTGTGCCTTAATTATAAATTAAATTTTATCATCGGTATGTATAGGAAAAAAACACAGTATCTATAGGGTTTGGTACTATCCAATGTTTCAGGCATCCATAGGGGGTCCAGGAACATATCCTCTGTAGATAAGTGGGAACTACTTTGAAAAAGACATGGCAGCCACTAGCTCCCAGCCACAGAATCTTGGGAATGTTACTTTACCTATAAGTTCCTACCCCTGCAACAAAAATGAAAGACTAGACAAGTTGGCTTCAAAGGTTCTTCTGTTACCTTCTCTAAATCTTTGGTGTACACATAAACCCCAATCTTTGCTATGACACCTAGCACAGACAACGTTTGTTGTGTAAGATAACTAAATAGCAATTTTCTTAAATTGTTTTGATGGTTCTGTCACTAAAGTATTATCTTGACTAAATGTTAATAAACTACTAAAAAGCATGTTTTCTAAATTCTGCCTTCCTTTGCTATTTACTTCTGGAAGAACAATAACATATCTCTGTCTTCACATATTTTTCCTGCCACAATAATAGAACATACTTTTCCTGTTGTGTTAAGAGTTGGTATTGTTTTCCTTGGTCACCACAGGTTAACTGTGCACAAGTAGTATCTACTACATACATTTCTAAATTTGAGAAAGTTATAAAAGGAGGCAAAATTAGACAAAAATGAGAAAGATAAATAGTTCTGTATGCCTTTACTATGCAAAAATGCAAGTTCTTTAATAAGTGGGGGGTGGACAGGAGGAGAGAAAAATAATGGTCTAATTGAATGCCACCTACCTCACCATGTCCACTGAAAGCTGCATGATGTAATGCAGTCCTCCCTGCTCGATCAGATACGTTTACATTACTCAGAAGAGGTACCAAAGCTTCAGCACACTTTACAGCTTTATTAGCAGCAGCTATATGTAAAGGGGTTTGCCAATTTTTGTCTCGAGCATTAACATCTGCAGAATGCTTCAAAAGTACCTGAACTGCTTCCTAAAACATATGAAAAGTTATAAAAGACAAATGAGTTAAGAGTTTGAGAAATTATTTCTATATATAGTGTGCGTGTGTGTGTATAAATATGTATCTACATATGAAGAAAATAAAAAAGCTCTCATAGAACTCACTATAAACAAGTAGGAATTTACCCAAGGAAAGAAAATATTTTGAATGTAAAGCGATACTGAGAGTACTCAGAGCCATATAAGATGCATCAAGATCAAACATTCCCTTTCCCTACCCTACCATTAAATGCATAAACATTCCTATCAAAATGTGCAAGAAGAAATACTGACAACTATTATTCAAAAACATTCATTTCTTATATCTTATTTTTCTAGGTAACAAGATACAATTTATACTTTGAATAAATAGTTTTTTGGTTTTGTTTTTTGGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15737523,-,1425H377M1D726M,60,4,1071;chr1,202317371,-,1393H50M1085H,60,1,45;\tMD:Z:1282\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:1282\tXS:i:0", true); final AlignmentInterval middleForContigOne = new AlignmentInterval( new SimpleInterval("chr1", 202317371, 202317402), @@ -248,14 +247,14 @@ private Object[][] forTestDeOverlapAlignments() { 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL); data.add(new Object[]{contigOne.getAlignments(), - Arrays.asList(SVTestUtils.fromSAMRecordString("asm004677:tig00000\t2064\tchr3\t15737523\t60\t1425H377M1D726M\t*\t0\t0\tAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr1,202317371,-,1393S50M1085S,60,1;\tMD:Z:164C19C192^A449T276\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:1071\tXS:i:161", true), + Arrays.asList(SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm004677:tig00000\t2064\tchr3\t15737523\t60\t1425H377M1D726M\t*\t0\t0\tAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr1,202317371,-,1393S50M1085S,60,1;\tMD:Z:164C19C192^A449T276\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:1071\tXS:i:161", true), middleForContigOne, - SVTestUtils.fromSAMRecordString("asm004677:tig00000\t16\tchr3\t15736242\t60\t1282M1246S\t*\t0\t0\tTTATCTCTTACTGTGCCTTAATTATAAATTAAATTTTATCATCGGTATGTATAGGAAAAAAACACAGTATCTATAGGGTTTGGTACTATCCAATGTTTCAGGCATCCATAGGGGGTCCAGGAACATATCCTCTGTAGATAAGTGGGAACTACTTTGAAAAAGACATGGCAGCCACTAGCTCCCAGCCACAGAATCTTGGGAATGTTACTTTACCTATAAGTTCCTACCCCTGCAACAAAAATGAAAGACTAGACAAGTTGGCTTCAAAGGTTCTTCTGTTACCTTCTCTAAATCTTTGGTGTACACATAAACCCCAATCTTTGCTATGACACCTAGCACAGACAACGTTTGTTGTGTAAGATAACTAAATAGCAATTTTCTTAAATTGTTTTGATGGTTCTGTCACTAAAGTATTATCTTGACTAAATGTTAATAAACTACTAAAAAGCATGTTTTCTAAATTCTGCCTTCCTTTGCTATTTACTTCTGGAAGAACAATAACATATCTCTGTCTTCACATATTTTTCCTGCCACAATAATAGAACATACTTTTCCTGTTGTGTTAAGAGTTGGTATTGTTTTCCTTGGTCACCACAGGTTAACTGTGCACAAGTAGTATCTACTACATACATTTCTAAATTTGAGAAAGTTATAAAAGGAGGCAAAATTAGACAAAAATGAGAAAGATAAATAGTTCTGTATGCCTTTACTATGCAAAAATGCAAGTTCTTTAATAAGTGGGGGGTGGACAGGAGGAGAGAAAAATAATGGTCTAATTGAATGCCACCTACCTCACCATGTCCACTGAAAGCTGCATGATGTAATGCAGTCCTCCCTGCTCGATCAGATACGTTTACATTACTCAGAAGAGGTACCAAAGCTTCAGCACACTTTACAGCTTTATTAGCAGCAGCTATATGTAAAGGGGTTTGCCAATTTTTGTCTCGAGCATTAACATCTGCAGAATGCTTCAAAAGTACCTGAACTGCTTCCTAAAACATATGAAAAGTTATAAAAGACAAATGAGTTAAGAGTTTGAGAAATTATTTCTATATATAGTGTGCGTGTGTGTGTATAAATATGTATCTACATATGAAGAAAATAAAAAAGCTCTCATAGAACTCACTATAAACAAGTAGGAATTTACCCAAGGAAAGAAAATATTTTGAATGTAAAGCGATACTGAGAGTACTCAGAGCCATATAAGATGCATCAAGATCAAACATTCCCTTTCCCTACCCTACCATTAAATGCATAAACATTCCTATCAAAATGTGCAAGAAGAAATACTGACAACTATTATTCAAAAACATTCATTTCTTATATCTTATTTTTCTAGGTAACAAGATACAATTTATACTTTGAATAAATAGTTTTTTGGTTTTGTTTTTTGGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15737523,-,1425S377M1D726M,60,4;chr1,202317371,-,1393H50M1085H,60,1;\tMD:Z:1282\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:1282\tXS:i:0", true) + SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm004677:tig00000\t16\tchr3\t15736242\t60\t1282M1246S\t*\t0\t0\tTTATCTCTTACTGTGCCTTAATTATAAATTAAATTTTATCATCGGTATGTATAGGAAAAAAACACAGTATCTATAGGGTTTGGTACTATCCAATGTTTCAGGCATCCATAGGGGGTCCAGGAACATATCCTCTGTAGATAAGTGGGAACTACTTTGAAAAAGACATGGCAGCCACTAGCTCCCAGCCACAGAATCTTGGGAATGTTACTTTACCTATAAGTTCCTACCCCTGCAACAAAAATGAAAGACTAGACAAGTTGGCTTCAAAGGTTCTTCTGTTACCTTCTCTAAATCTTTGGTGTACACATAAACCCCAATCTTTGCTATGACACCTAGCACAGACAACGTTTGTTGTGTAAGATAACTAAATAGCAATTTTCTTAAATTGTTTTGATGGTTCTGTCACTAAAGTATTATCTTGACTAAATGTTAATAAACTACTAAAAAGCATGTTTTCTAAATTCTGCCTTCCTTTGCTATTTACTTCTGGAAGAACAATAACATATCTCTGTCTTCACATATTTTTCCTGCCACAATAATAGAACATACTTTTCCTGTTGTGTTAAGAGTTGGTATTGTTTTCCTTGGTCACCACAGGTTAACTGTGCACAAGTAGTATCTACTACATACATTTCTAAATTTGAGAAAGTTATAAAAGGAGGCAAAATTAGACAAAAATGAGAAAGATAAATAGTTCTGTATGCCTTTACTATGCAAAAATGCAAGTTCTTTAATAAGTGGGGGGTGGACAGGAGGAGAGAAAAATAATGGTCTAATTGAATGCCACCTACCTCACCATGTCCACTGAAAGCTGCATGATGTAATGCAGTCCTCCCTGCTCGATCAGATACGTTTACATTACTCAGAAGAGGTACCAAAGCTTCAGCACACTTTACAGCTTTATTAGCAGCAGCTATATGTAAAGGGGTTTGCCAATTTTTGTCTCGAGCATTAACATCTGCAGAATGCTTCAAAAGTACCTGAACTGCTTCCTAAAACATATGAAAAGTTATAAAAGACAAATGAGTTAAGAGTTTGAGAAATTATTTCTATATATAGTGTGCGTGTGTGTGTATAAATATGTATCTACATATGAAGAAAATAAAAAAGCTCTCATAGAACTCACTATAAACAAGTAGGAATTTACCCAAGGAAAGAAAATATTTTGAATGTAAAGCGATACTGAGAGTACTCAGAGCCATATAAGATGCATCAAGATCAAACATTCCCTTTCCCTACCCTACCATTAAATGCATAAACATTCCTATCAAAATGTGCAAGAAGAAATACTGACAACTATTATTCAAAAACATTCATTTCTTATATCTTATTTTTCTAGGTAACAAGATACAATTTATACTTTGAATAAATAGTTTTTTGGTTTTGTTTTTTGGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15737523,-,1425S377M1D726M,60,4;chr1,202317371,-,1393H50M1085H,60,1;\tMD:Z:1282\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:1282\tXS:i:0", true) ) }); - final AlignedContig contigTwo = SVTestUtils.fromPrimarySAMRecordString("asm004901:tig00000\t0\tchr3\t44699487\t60\t786S1319M692S\t*\t0\t0\tGGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCATGAACCCGGGAAGCGGAGCTTGCAGTGAGCCGAGATTGCGCCACTGCAGTCCGCAGTCTGGCCTGGGCAACAGAGCGAGACTCCGTCTCAAAAAAAAAAAAAAAAAAGAAAATGGCTTTGGAAACTGGATTTTTGTTGTAGACTTGATAATGTTTCCCAATGAATTACAAGGCAAAACAGTTCTTATATGTGAAACTTATACTGTAGTAAAGTTATTCTGACAACTGACATAGTCTGAATCACAAGCAAAATCAAGCTGCTCTATATGCTTTTTTTGGCTATTGAAAGTTAAAAAAAGAAGTGAGATCTTCACTCCTATCCAAACTGGCTGCAGGCATGTTTTTTGAACTCAAATCACAGTTCCAGAAGTAGTTTTCAGATCACAACTTGATGCAAGTGCAAAGGAAATAAGAAAATTTCCTTCCTCCCTCCCTCCCTTCCTCCTTCCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTCCCTCCCTCCCTTCCTTCCTTCTTTCCTTCTTTCCTCTCTATCTCTTTTTCTTTCTTGCTTTGTGGAGACCAGGTCTTGCCATTTTGCCTAGGCTGGTCTTGAACTCCTAGGCTTAAGTGATTCTCCCTCCTCAGCCTCCCAAAGAGAAATTTTCATATTTACAAATCCATTTAAGAGGCAATTGAAAAAACATTTAAATTTTAATTTTAAAAAAGAGGCAGTGGAGTGGCCGGGTGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAAGCCAAGGGGGGCAGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACATGGTGAAACCCCGTCTCTACTAAAAAAATACAAAAAAATTAGCCAGGTGTGGTGGTGGATGCCTGCAGTCCCAGCTACTCTGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGCAGAGCTTGCAATGAGCTGAGACTGAGCCACTGCACTACAGCCTGGGCAACAGAGCGAGACTGCATCTCAACAACAACAACAAAAAAACGCAGTGGAAAAGTTTCCACAAACCTTTGTTGGGGAAATATAATTAAAAACAAATCCTTCTTCCAACCCAGAAATTCTCTCTACAAAGGTAGCAGGGAAAGAATACACTATTAAAGAAGCATTAAACTAGAATGTAACACATATCACAGGCAACCTACAAAGAGATTGAAAAGGCAGAAAGAAATCTCGCCTCCTTATATAGCCGAACAGATACAACCCATTACATACATGCTTTCAAGATAAACAATAACTAGTCTTCAAATAAGAGGTCTTGGCAGCACCTTTTGTCACATGGTTTTTCCCAACTTTATTGTGGTAATTGGAGTGACCATCTATGTTAGCTAATTGGCTTTATCCAGAGGAAAATCTTCTCAACTTTTTATGGCAGAGGTAGTTTTGCAACTTCAAGCAGGGCACCCACCGAAGCTAAGCTCTAATTTTTGCTATTCAAAGAGATGGCTTTCAGGTCCTCGAGAAAAACATTCCTTGGTCATGGAGCTGAGAAAAGGCCAGTTGGCAAAAGTCCTGCCTAGTCTTCAAAAGCCTATATATACATATGTATTTCAAAGAAAAGAGAAAATACTTACAAGTTTTCTAAGGTAAATGTCCTGAGAAAAAGGAGGGGAGGGAAATCTCTTCTCCTGTTTTCACCAGAGTAAATTGAACCTCTTATTTTTAATTTGCATTTGCCCTTATATTTTCTATGTGAAAACACATTTTCTTTTTTCTTCTCTTTTCTTTTCTTTTTTTTTTTTGACGGAGTCTCCCTCAGCCACCCAGGCTGGAGTGCAGTGGCATGATCTTGGCTCACCACAACCACCATCTCCTGGGTTCAAGTGATTCTCCCATCTCAGCCTCCCGAGTAGCTGGGATTACAGGCACCCGCCATCATGCCCGGCTAATTTTTGTATTTTAGTAGAGACAGGGTTTCACCTTGTTGGCCAGGCTGGTCTTGAACTCCTGACCTCAGGTGATCCATCCACCTTGGCCTCCCAAAGTGCTGGGATTACAGGTTTGAGCCACCGTGCCCAGCCAAAACACATTTTCTGGTTTGTTTGTTTTTGTAATTTAAGACCAAATAATTTATTTTGGGAAATTTATTGTGAATAGACATTAAAAATAGGTTCATTTGTGCAAAGTAATCTAAACAATGAATGTTTGTGCATATATATATATATATATATATACTGATATATATATACGCACATATACACATACATAACACAATTCTGTATGGGAAATTCATTAAACAACGAGTAGAGTCAACAGTTTGAGTCTCTCAATAACTTAGTAAAAAGCAATATTGATCTTTATAAAAATAATTTTAAAACTTTTTTCTGTTTGTGAAAAATAAAATATCAATTTGAAAATTACTGTCAGACCATGAAGGCAAATAAGAAGTAATATAATCTATCTTCATTTATAGATGAGGAAGAAAGTGACTCCAGAAAAGTTAAATAAAACTTTCTTAAAATTATGTAGGCAGTTATAGTGGCAGAGCTGGGTTATTAATCTGTTACTCCTCACTTACAGTTCAGTGGTTTGGTTTTTTTGTTTGTTTGTTTTGTTTATTCTGCTCCTACTGTTTCCATCTCAGCCTTAACATTTCCTTTTATTCTTCATTTGAGCTAAGCATTCTTTTCTTCTAAAGAAGACCATCGACTTTATTTGAATCATAAGTCACACACACAAACACACACACACACACACACACACA\t*\tSA:Z:chr3,44700691,-,1898H58M2D292M12D387M1I161M,60,26,780;chr3,44698789,-,530M6D217M2I58M1990H,60,19,710,;\tMD:Z:10A0A1295C0G10\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:1299\tXS:i:0", true); + final AlignedContig contigTwo = SVDiscoveryTestUtilsAndCommonDataProvider.fromPrimarySAMRecordString("asm004901:tig00000\t0\tchr3\t44699487\t60\t786S1319M692S\t*\t0\t0\tGGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCATGAACCCGGGAAGCGGAGCTTGCAGTGAGCCGAGATTGCGCCACTGCAGTCCGCAGTCTGGCCTGGGCAACAGAGCGAGACTCCGTCTCAAAAAAAAAAAAAAAAAAGAAAATGGCTTTGGAAACTGGATTTTTGTTGTAGACTTGATAATGTTTCCCAATGAATTACAAGGCAAAACAGTTCTTATATGTGAAACTTATACTGTAGTAAAGTTATTCTGACAACTGACATAGTCTGAATCACAAGCAAAATCAAGCTGCTCTATATGCTTTTTTTGGCTATTGAAAGTTAAAAAAAGAAGTGAGATCTTCACTCCTATCCAAACTGGCTGCAGGCATGTTTTTTGAACTCAAATCACAGTTCCAGAAGTAGTTTTCAGATCACAACTTGATGCAAGTGCAAAGGAAATAAGAAAATTTCCTTCCTCCCTCCCTCCCTTCCTCCTTCCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTCCCTCCCTCCCTTCCTTCCTTCTTTCCTTCTTTCCTCTCTATCTCTTTTTCTTTCTTGCTTTGTGGAGACCAGGTCTTGCCATTTTGCCTAGGCTGGTCTTGAACTCCTAGGCTTAAGTGATTCTCCCTCCTCAGCCTCCCAAAGAGAAATTTTCATATTTACAAATCCATTTAAGAGGCAATTGAAAAAACATTTAAATTTTAATTTTAAAAAAGAGGCAGTGGAGTGGCCGGGTGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAAGCCAAGGGGGGCAGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACATGGTGAAACCCCGTCTCTACTAAAAAAATACAAAAAAATTAGCCAGGTGTGGTGGTGGATGCCTGCAGTCCCAGCTACTCTGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGCAGAGCTTGCAATGAGCTGAGACTGAGCCACTGCACTACAGCCTGGGCAACAGAGCGAGACTGCATCTCAACAACAACAACAAAAAAACGCAGTGGAAAAGTTTCCACAAACCTTTGTTGGGGAAATATAATTAAAAACAAATCCTTCTTCCAACCCAGAAATTCTCTCTACAAAGGTAGCAGGGAAAGAATACACTATTAAAGAAGCATTAAACTAGAATGTAACACATATCACAGGCAACCTACAAAGAGATTGAAAAGGCAGAAAGAAATCTCGCCTCCTTATATAGCCGAACAGATACAACCCATTACATACATGCTTTCAAGATAAACAATAACTAGTCTTCAAATAAGAGGTCTTGGCAGCACCTTTTGTCACATGGTTTTTCCCAACTTTATTGTGGTAATTGGAGTGACCATCTATGTTAGCTAATTGGCTTTATCCAGAGGAAAATCTTCTCAACTTTTTATGGCAGAGGTAGTTTTGCAACTTCAAGCAGGGCACCCACCGAAGCTAAGCTCTAATTTTTGCTATTCAAAGAGATGGCTTTCAGGTCCTCGAGAAAAACATTCCTTGGTCATGGAGCTGAGAAAAGGCCAGTTGGCAAAAGTCCTGCCTAGTCTTCAAAAGCCTATATATACATATGTATTTCAAAGAAAAGAGAAAATACTTACAAGTTTTCTAAGGTAAATGTCCTGAGAAAAAGGAGGGGAGGGAAATCTCTTCTCCTGTTTTCACCAGAGTAAATTGAACCTCTTATTTTTAATTTGCATTTGCCCTTATATTTTCTATGTGAAAACACATTTTCTTTTTTCTTCTCTTTTCTTTTCTTTTTTTTTTTTGACGGAGTCTCCCTCAGCCACCCAGGCTGGAGTGCAGTGGCATGATCTTGGCTCACCACAACCACCATCTCCTGGGTTCAAGTGATTCTCCCATCTCAGCCTCCCGAGTAGCTGGGATTACAGGCACCCGCCATCATGCCCGGCTAATTTTTGTATTTTAGTAGAGACAGGGTTTCACCTTGTTGGCCAGGCTGGTCTTGAACTCCTGACCTCAGGTGATCCATCCACCTTGGCCTCCCAAAGTGCTGGGATTACAGGTTTGAGCCACCGTGCCCAGCCAAAACACATTTTCTGGTTTGTTTGTTTTTGTAATTTAAGACCAAATAATTTATTTTGGGAAATTTATTGTGAATAGACATTAAAAATAGGTTCATTTGTGCAAAGTAATCTAAACAATGAATGTTTGTGCATATATATATATATATATATATACTGATATATATATACGCACATATACACATACATAACACAATTCTGTATGGGAAATTCATTAAACAACGAGTAGAGTCAACAGTTTGAGTCTCTCAATAACTTAGTAAAAAGCAATATTGATCTTTATAAAAATAATTTTAAAACTTTTTTCTGTTTGTGAAAAATAAAATATCAATTTGAAAATTACTGTCAGACCATGAAGGCAAATAAGAAGTAATATAATCTATCTTCATTTATAGATGAGGAAGAAAGTGACTCCAGAAAAGTTAAATAAAACTTTCTTAAAATTATGTAGGCAGTTATAGTGGCAGAGCTGGGTTATTAATCTGTTACTCCTCACTTACAGTTCAGTGGTTTGGTTTTTTTGTTTGTTTGTTTTGTTTATTCTGCTCCTACTGTTTCCATCTCAGCCTTAACATTTCCTTTTATTCTTCATTTGAGCTAAGCATTCTTTTCTTCTAAAGAAGACCATCGACTTTATTTGAATCATAAGTCACACACACAAACACACACACACACACACACACACA\t*\tSA:Z:chr3,44700691,-,1898H58M2D292M12D387M1I161M,60,26,780;chr3,44698789,-,530M6D217M2I58M1990H,60,19,710,;\tMD:Z:10A0A1295C0G10\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:1299\tXS:i:0", true); final AlignmentInterval middleForContigTwo = new AlignmentInterval( new SimpleInterval("chr3", 44699600, 44700690), 900, 1990, @@ -263,9 +262,9 @@ private Object[][] forTestDeOverlapAlignments() { 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL); data.add(new Object[]{contigTwo.getAlignments(), - Arrays.asList(SVTestUtils.fromSAMRecordString("asm004901:tig00000\t2064\tchr3\t44700691\t60\t1898H58M2D292M12D387M1I161M\t*\t0\t0\tTTTTAGTAGAGACGGGGTTTCACCATGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCTGCCCCCCTTGGCTTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCACCCGGCCACTCCACTGCCTCTTTTTTAAAATTAAAATTTAAATGTTTTTTCAATTGCCTCTTAAATGGATTTGTAAATATGAAAATTTCTCTTTGGGAGGCTGAGGAGGGAGAATCACTTAAGCCTAGGAGTTCAAGACCAGCCTAGGCAAAATGGCAAGACCTGGTCTCCACAAAGCAAGAAAGAAAAAGAGATAGAGAGGAAAGAAGGAAAGAAGGAAGGAAGGGAGGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGGAAGGAGGAAGGGAGGGAGGGAGGAAGGAAATTTTCTTATTTCCTTTGCACTTGCATCAAGTTGTGATCTGAAAACTACTTCTGGAACTGTGATTTGAGTTCAAAAAACATGCCTGCAGCCAGTTTGGATAGGAGTGAAGATCTCACTTCTTTTTTTAACTTTCAATAGCCAAAAAAAGCATATAGAGCAGCTTGATTTTGCTTGTGATTCAGACTATGTCAGTTGTCAGAATAACTTTACTACAGTATAAGTTTCACATATAAGAACTGTTTTGCCTTGTAATTCATTGGGAAACATTATCAAGTCTACAACAAAAATCCAGTTTCCAAAGCCATTTTCTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTTGCCCAGGCCAGACTGCGGACTGCAGTGGCGCAATCTCGGCTCACTGCAAGCTCCGCTTCCCGGGTTCATGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACC\t*\tSA:Z:chr3,44699487,+,786S1319M692S,60,4;chr3,44698789,-,530M6D217M2I58M1990S,60,19;\tMD:Z:13A10T4G6C6T2A11^AG6C0A0T2A7C272^AGGAAGGAAGGA548\tRG:Z:GATKSVContigAlignments\tNM:i:26\tAS:i:780\tXS:i:0", true), + Arrays.asList(SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm004901:tig00000\t2064\tchr3\t44700691\t60\t1898H58M2D292M12D387M1I161M\t*\t0\t0\tTTTTAGTAGAGACGGGGTTTCACCATGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCTGCCCCCCTTGGCTTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCACCCGGCCACTCCACTGCCTCTTTTTTAAAATTAAAATTTAAATGTTTTTTCAATTGCCTCTTAAATGGATTTGTAAATATGAAAATTTCTCTTTGGGAGGCTGAGGAGGGAGAATCACTTAAGCCTAGGAGTTCAAGACCAGCCTAGGCAAAATGGCAAGACCTGGTCTCCACAAAGCAAGAAAGAAAAAGAGATAGAGAGGAAAGAAGGAAAGAAGGAAGGAAGGGAGGGAGGGAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGGAAGGAGGAAGGGAGGGAGGGAGGAAGGAAATTTTCTTATTTCCTTTGCACTTGCATCAAGTTGTGATCTGAAAACTACTTCTGGAACTGTGATTTGAGTTCAAAAAACATGCCTGCAGCCAGTTTGGATAGGAGTGAAGATCTCACTTCTTTTTTTAACTTTCAATAGCCAAAAAAAGCATATAGAGCAGCTTGATTTTGCTTGTGATTCAGACTATGTCAGTTGTCAGAATAACTTTACTACAGTATAAGTTTCACATATAAGAACTGTTTTGCCTTGTAATTCATTGGGAAACATTATCAAGTCTACAACAAAAATCCAGTTTCCAAAGCCATTTTCTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTTGCCCAGGCCAGACTGCGGACTGCAGTGGCGCAATCTCGGCTCACTGCAAGCTCCGCTTCCCGGGTTCATGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACC\t*\tSA:Z:chr3,44699487,+,786S1319M692S,60,4;chr3,44698789,-,530M6D217M2I58M1990S,60,19;\tMD:Z:13A10T4G6C6T2A11^AG6C0A0T2A7C272^AGGAAGGAAGGA548\tRG:Z:GATKSVContigAlignments\tNM:i:26\tAS:i:780\tXS:i:0", true), middleForContigTwo, - SVTestUtils.fromSAMRecordString("asm004901:tig00000\t2064\tchr3\t44698789\t60\t530M6D217M2I58M1990H\t*\t0\t0\tTGTGTGTGTGTGTGTGTGTGTGTGTTTGTGTGTGTGACTTATGATTCAAATAAAGTCGATGGTCTTCTTTAGAAGAAAAGAATGCTTAGCTCAAATGAAGAATAAAAGGAAATGTTAAGGCTGAGATGGAAACAGTAGGAGCAGAATAAACAAAACAAACAAACAAAAAAACCAAACCACTGAACTGTAAGTGAGGAGTAACAGATTAATAACCCAGCTCTGCCACTATAACTGCCTACATAATTTTAAGAAAGTTTTATTTAACTTTTCTGGAGTCACTTTCTTCCTCATCTATAAATGAAGATAGATTATATTACTTCTTATTTGCCTTCATGGTCTGACAGTAATTTTCAAATTGATATTTTATTTTTCACAAACAGAAAAAAGTTTTAAAATTATTTTTATAAAGATCAATATTGCTTTTTACTAAGTTATTGAGAGACTCAAACTGTTGACTCTACTCGTTGTTTAATGAATTTCCCATACAGAATTGTGTTATGTATGTGTATATGTGCGTATATATATATCAGTATATATATATATATATATATATGCACAAACATTCATTGTTTAGATTACTTTGCACAAATGAACCTATTTTTAATGTCTATTCACAATAAATTTCCCAAAATAAATTATTTGGTCTTAAATTACAAAAACAAACAAACCAGAAAATGTGTTTTGGCTGGGCACGGTGGCTCAAACCTGTAATCCCAGCACTTTGGGAGGCCAAGGTGGATGGATCACCTGAGGTCAGGAGTTCAAGACCAGCCTGGCCAACAAGGTGAAACCCTGTCTCTACTAAAA\t*\tSA:Z:chr3,44699487,+,786S1319M692S,60,4;chr3,44700691,-,1898S58M2D292M12D387M1I161M,60,26;\tMD:Z:530^TATATA197A7G2G0C0A17A2G6T6T4T10C13\tRG:Z:GATKSVContigAlignments\tNM:i:19\tAS:i:710\tXS:i:0", true) + SVDiscoveryTestUtilsAndCommonDataProvider.fromSAMRecordString("asm004901:tig00000\t2064\tchr3\t44698789\t60\t530M6D217M2I58M1990H\t*\t0\t0\tTGTGTGTGTGTGTGTGTGTGTGTGTTTGTGTGTGTGACTTATGATTCAAATAAAGTCGATGGTCTTCTTTAGAAGAAAAGAATGCTTAGCTCAAATGAAGAATAAAAGGAAATGTTAAGGCTGAGATGGAAACAGTAGGAGCAGAATAAACAAAACAAACAAACAAAAAAACCAAACCACTGAACTGTAAGTGAGGAGTAACAGATTAATAACCCAGCTCTGCCACTATAACTGCCTACATAATTTTAAGAAAGTTTTATTTAACTTTTCTGGAGTCACTTTCTTCCTCATCTATAAATGAAGATAGATTATATTACTTCTTATTTGCCTTCATGGTCTGACAGTAATTTTCAAATTGATATTTTATTTTTCACAAACAGAAAAAAGTTTTAAAATTATTTTTATAAAGATCAATATTGCTTTTTACTAAGTTATTGAGAGACTCAAACTGTTGACTCTACTCGTTGTTTAATGAATTTCCCATACAGAATTGTGTTATGTATGTGTATATGTGCGTATATATATATCAGTATATATATATATATATATATATGCACAAACATTCATTGTTTAGATTACTTTGCACAAATGAACCTATTTTTAATGTCTATTCACAATAAATTTCCCAAAATAAATTATTTGGTCTTAAATTACAAAAACAAACAAACCAGAAAATGTGTTTTGGCTGGGCACGGTGGCTCAAACCTGTAATCCCAGCACTTTGGGAGGCCAAGGTGGATGGATCACCTGAGGTCAGGAGTTCAAGACCAGCCTGGCCAACAAGGTGAAACCCTGTCTCTACTAAAA\t*\tSA:Z:chr3,44699487,+,786S1319M692S,60,4;chr3,44700691,-,1898S58M2D292M12D387M1I161M,60,26;\tMD:Z:530^TATATA197A7G2G0C0A17A2G6T6T4T10C13\tRG:Z:GATKSVContigAlignments\tNM:i:19\tAS:i:710\tXS:i:0", true) ) }); @@ -285,12 +284,12 @@ public void testDeOverlapAlignments(final List alignments, @Test(groups = "sv") public void testTurnIntoVariantContext() throws IOException { - final CpxVariantInducingAssemblyContig tig13846_3 = new CpxVariantInducingAssemblyContig(new AssemblyContigWithFineTunedAlignments(SVTestUtils.fromPrimarySAMRecordString("asm013846:tig00003\t0\tchr20\t54849615\t60\t192S49M2I55M1I50M1I246M\t*\t0\t0\tATATTTCTCAGAGGGTCTCTGGGGAGTTGATAGGCTTTGGATGTTTGCCTCCTCCAAATCTCATGTTGAAATGTAATCCCCAGTGTTGGAGGGGGGCAGATCCCTCATGAGTGGCTTGGTGCCCTTCCCATGGTAATGAGTGAGTTCTTGCTCTGTTAGTTCATGAGAGAGCTGATTGTTTAAAGGAGTCTGGCACCTCCTCTCTCTCCTTTCTTCCTCTCTCACCATGTGACACTCCTATCCCCCTTTGCCTTCTTGCCTGAGTAAAAGCTTCCTAAGGCCTCACCAGAAGCCGAGCAGATGCTGTTGCCATGCTTGTAGTCTGCAGAACCATAACCCAAATAAACCCAAGTTTTTATAAATTACCCAGCTTCAGGTATTCCTTGATAGCAACGCAAAATGGATTAACACCTAACCACAGGTGCCCACAGCTGGAACTTGCTCCTTGCCTTATGCTTTGTTGACATTTTTCCCTTCCCTGATTTGCTTTCTTCACTTTCCTCACTCTCTCATTGTGCTTCCTGGAATTATCTCCCAAATAATCAATCTGCACTTACATCCTTTTTATCTCAGGGTCTACTTTTGGAGATACCAAA\t*\tSA:Z:chr20,54849438,+,54M542H,60,0,54;chr15,68774173,+,92H63M441H,60,3,48;\tMD:Z:400\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:348\tXS:i:0", true)), CpxSVInferenceTestUtils.bareBoneHg38SAMSeqDict);; - final CpxVariantInducingAssemblyContig tig28220_5 = new CpxVariantInducingAssemblyContig(new AssemblyContigWithFineTunedAlignments(SVTestUtils.fromPrimarySAMRecordString("asm028220:tig00005\t16\tchr20\t54849438\t60\t54M206S\t*\t0\t0\tATATTTCTCAGAGGGTCTCTGGGGAGTTGATAGGCTTTGGATGTTTGCCTCCTCCAAATCTCATGTTGAAATGTAATCCCCAGTGTTGGAGGGGGGCAGATCCCTCATGAGTGGCTTGGTGCCCTTCCCATGGTAATGAGTGAGTTCTTGCTCTGTTAGTTCATGAGAGAGCTGATTGTTTAAAGGAGTCTGGCACCTCCTCTCTCTCCTTTCTTCCTCTCTCACCATGTGACACTCCTATCCCCCTTTGCCTTCTTGCC\t*\tSA:Z:chr20,54849615,-,192H49M2I17M,60,2,52;chr15,68774173,-,92H63M105H,60,3,48;\tMD:Z:54\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:54\tXS:i:0", true)), CpxSVInferenceTestUtils.bareBoneHg38SAMSeqDict); + final CpxVariantInducingAssemblyContig tig13846_3 = new CpxVariantInducingAssemblyContig(new AssemblyContigWithFineTunedAlignments(SVDiscoveryTestUtilsAndCommonDataProvider.fromPrimarySAMRecordString("asm013846:tig00003\t0\tchr20\t54849615\t60\t192S49M2I55M1I50M1I246M\t*\t0\t0\tATATTTCTCAGAGGGTCTCTGGGGAGTTGATAGGCTTTGGATGTTTGCCTCCTCCAAATCTCATGTTGAAATGTAATCCCCAGTGTTGGAGGGGGGCAGATCCCTCATGAGTGGCTTGGTGCCCTTCCCATGGTAATGAGTGAGTTCTTGCTCTGTTAGTTCATGAGAGAGCTGATTGTTTAAAGGAGTCTGGCACCTCCTCTCTCTCCTTTCTTCCTCTCTCACCATGTGACACTCCTATCCCCCTTTGCCTTCTTGCCTGAGTAAAAGCTTCCTAAGGCCTCACCAGAAGCCGAGCAGATGCTGTTGCCATGCTTGTAGTCTGCAGAACCATAACCCAAATAAACCCAAGTTTTTATAAATTACCCAGCTTCAGGTATTCCTTGATAGCAACGCAAAATGGATTAACACCTAACCACAGGTGCCCACAGCTGGAACTTGCTCCTTGCCTTATGCTTTGTTGACATTTTTCCCTTCCCTGATTTGCTTTCTTCACTTTCCTCACTCTCTCATTGTGCTTCCTGGAATTATCTCCCAAATAATCAATCTGCACTTACATCCTTTTTATCTCAGGGTCTACTTTTGGAGATACCAAA\t*\tSA:Z:chr20,54849438,+,54M542H,60,0,54;chr15,68774173,+,92H63M441H,60,3,48;\tMD:Z:400\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:348\tXS:i:0", true)), CpxSVInferenceTestUtils.bareBoneHg38SAMSeqDict);; + final CpxVariantInducingAssemblyContig tig28220_5 = new CpxVariantInducingAssemblyContig(new AssemblyContigWithFineTunedAlignments(SVDiscoveryTestUtilsAndCommonDataProvider.fromPrimarySAMRecordString("asm028220:tig00005\t16\tchr20\t54849438\t60\t54M206S\t*\t0\t0\tATATTTCTCAGAGGGTCTCTGGGGAGTTGATAGGCTTTGGATGTTTGCCTCCTCCAAATCTCATGTTGAAATGTAATCCCCAGTGTTGGAGGGGGGCAGATCCCTCATGAGTGGCTTGGTGCCCTTCCCATGGTAATGAGTGAGTTCTTGCTCTGTTAGTTCATGAGAGAGCTGATTGTTTAAAGGAGTCTGGCACCTCCTCTCTCTCCTTTCTTCCTCTCTCACCATGTGACACTCCTATCCCCCTTTGCCTTCTTGCC\t*\tSA:Z:chr20,54849615,-,192H49M2I17M,60,2,52;chr15,68774173,-,92H63M105H,60,3,48;\tMD:Z:54\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:54\tXS:i:0", true)), CpxSVInferenceTestUtils.bareBoneHg38SAMSeqDict); final CpxVariantCanonicalRepresentation cpxVariantCanonicalRepresentation = new CpxVariantCanonicalRepresentation(tig13846_3); final Tuple2> tuple2 = new Tuple2<>(cpxVariantCanonicalRepresentation, Arrays.asList(tig13846_3, tig28220_5)); - final Broadcast broadcast = SparkContextFactory.getTestSparkContext().broadcast(SimpleSVDiscoveryTestDataProvider.b38_reference); + final Broadcast broadcast = SparkContextFactory.getTestSparkContext().broadcast(SVDiscoveryTestUtilsAndCommonDataProvider.b38_reference_chr20_chr21); final byte[] refBases = broadcast.getValue().getReferenceBases(new SimpleInterval("chr20", 54849491, 54849615)).getBases(); final VariantContextBuilder baseVariantContextBuilder = cpxVariantCanonicalRepresentation.toVariantContext(refBases); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotypeUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotypeUnitTest.java index 4da45db11b0..f5bc83a6c53 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotypeUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotypeUnitTest.java @@ -9,7 +9,7 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection; import org.broadinstitute.hellbender.tools.spark.sv.discovery.DiscoverVariantsFromContigAlignmentsSAMSpark; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVTestUtils; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider; import org.broadinstitute.hellbender.tools.spark.sv.discovery.SimpleSVDiscoveryTestDataProvider; import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvType; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.*; @@ -91,8 +91,8 @@ private static NovelAdjacencyAndAltHaplotype getBreakpoints(final String contigN final AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("20", 20101, 20200), 101, 200, TextCigarCodec.decode("100M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); final ArrayList insertionMappings = new ArrayList<>(); insertionMappings.add(insertionMapping); - final SimpleChimera breakpoint = new SimpleChimera(region1, region2, insertionMappings, contigName, NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict); - return new NovelAdjacencyAndAltHaplotype(breakpoint, SVTestUtils.makeDummySequence(200, (byte)'A'), b37_seqDict); + final SimpleChimera breakpoint = new SimpleChimera(region1, region2, insertionMappings, contigName, NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); + return new NovelAdjacencyAndAltHaplotype(breakpoint, SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(200, (byte)'A'), SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); } // ----------------------------------------------------------------------------------------------- @@ -168,7 +168,7 @@ public void testGetAssembledBreakpointsFromAlignmentIntervalsWithOverlappingAlig final AlignmentInterval region3 = new AlignmentInterval(new SimpleInterval("20", 23103633, 23104602), 556, 1525, TextCigarCodec.decode("555S970M"), true, 60, 3, 100, ContigAlignmentsModifier.AlnModType.NONE); final AlignedContig alignedContig = new AlignedContig("asm00001:tig0001", contigSequence, Arrays.asList(region1, region2, region3)); - final List assembledBreakpointsFromAlignmentIntervals = DiscoverVariantsFromContigAlignmentsSAMSpark.parseOneContig(alignedContig, b37_seqDict, true, StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.DEFAULT_MIN_ALIGNMENT_LENGTH, StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.CHIMERIC_ALIGNMENTS_HIGHMQ_THRESHOLD, true); + final List assembledBreakpointsFromAlignmentIntervals = DiscoverVariantsFromContigAlignmentsSAMSpark.parseOneContig(alignedContig, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21, true, StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.DEFAULT_MIN_ALIGNMENT_LENGTH, StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.CHIMERIC_ALIGNMENTS_HIGHMQ_THRESHOLD, true); Assert.assertEquals(assembledBreakpointsFromAlignmentIntervals.size(), 1); final SimpleChimera simpleChimera = assembledBreakpointsFromAlignmentIntervals.get(0); Assert.assertEquals(simpleChimera.sourceContigName, "asm00001:tig0001"); @@ -177,7 +177,7 @@ public void testGetAssembledBreakpointsFromAlignmentIntervalsWithOverlappingAlig Assert.assertEquals(simpleChimera.insertionMappings.size(), 1); final String expectedInsertionMappingsString = String.join(AlignmentInterval.PACKED_STRING_REP_SEPARATOR, "516", "557", "20:23103196-23103237", "-", "515S42M968S", "60", "2", "100", "O"); Assert.assertEquals(simpleChimera.insertionMappings.get(0), expectedInsertionMappingsString); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(simpleChimera, contigSequence, b37_seqDict); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(simpleChimera, contigSequence, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); Assert.assertTrue(breakpoints.getComplication().getHomologyForwardStrandRep().isEmpty()); Assert.assertEquals(breakpoints.getComplication().getInsertedSequenceForwardStrandRep().getBytes(), Arrays.copyOfRange(contigSequence, 519, 555)); Assert.assertEquals(breakpoints.getAltHaplotypeSequence(), Arrays.copyOfRange(contigSequence, 519, 555)); @@ -188,9 +188,9 @@ public void testGetAssembledBreakpointsFromAlignmentIntervalsWithOverlappingAlig public void testGetBreakpoints_5to3Inversion_simple() { final AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("20", 101, 200), 1, 100, TextCigarCodec.decode("100M100S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); final AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("20", 501, 600), 101, 200, TextCigarCodec.decode("100S100M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final SimpleChimera simpleChimera = new SimpleChimera(region1, region2, Collections.emptyList(), "1", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict); + final SimpleChimera simpleChimera = new SimpleChimera(region1, region2, Collections.emptyList(), "1", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); final Tuple2 breakpoints = - BreakpointsInference.getInferenceClass(simpleChimera, null, b37_seqDict) + BreakpointsInference.getInferenceClass(simpleChimera, null, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21) .getLeftJustifiedBreakpoints(); Assert.assertEquals(breakpoints._1(), new SimpleInterval("20", 200, 200)); Assert.assertEquals(breakpoints._2(), new SimpleInterval("20", 600, 600)); @@ -202,10 +202,10 @@ public void testGetBreakpoints_5to3Inversion_withSimpleHomology() { final AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("20", 101, 156), 1, 56, TextCigarCodec.decode("56M50S"), true, 60, 0, 56, ContigAlignmentsModifier.AlnModType.NONE); final AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("20", 501, 556), 51, 106, TextCigarCodec.decode("56M50S"), false, 60, 0, 56, ContigAlignmentsModifier.AlnModType.NONE); - final SimpleChimera simpleChimera = new SimpleChimera(region1, region2, Collections.emptyList(), "1", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b37_seqDict); + final SimpleChimera simpleChimera = new SimpleChimera(region1, region2, Collections.emptyList(), "1", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); final Tuple2 breakpoints = - BreakpointsInference.getInferenceClass(simpleChimera, contigSeq, b37_seqDict) + BreakpointsInference.getInferenceClass(simpleChimera, contigSeq, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21) .getLeftJustifiedBreakpoints(); Assert.assertEquals(breakpoints._1(), new SimpleInterval("20", 150, 150)); Assert.assertEquals(breakpoints._2(), new SimpleInterval("20", 556, 556)); @@ -236,7 +236,7 @@ public void testGetBreakpoints_simpleDeletion() { @Test(groups = "sv") public void testGetBreakpoints_simpleInsertion() { - final byte[] insertedSeq = SVTestUtils.makeDummySequence(50, (byte)'C'); + final byte[] insertedSeq = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(50, (byte)'C'); final NovelAdjacencyAndAltHaplotype breakpoints = forSimpleInsertion_plus.biPathBubble; final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = forSimpleInsertion_minus.biPathBubble; @@ -253,7 +253,7 @@ public void testGetBreakpoints_simpleInsertion() { @Test(groups = "sv") public void testGetBreakpoints_longRangeSubstitution() { - byte[] substitution = SVTestUtils.makeDummySequence(10, (byte)'C'); + byte[] substitution = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(10, (byte)'C'); NovelAdjacencyAndAltHaplotype breakpoints = forLongRangeSubstitution_fudgedDel_minus.biPathBubble; NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = forLongRangeSubstitution_fudgedDel_minus.biPathBubble; @@ -264,7 +264,7 @@ public void testGetBreakpoints_longRangeSubstitution() { substitution); Assert.assertEquals(breakpointsDetectedFromReverseStrand, breakpoints); - substitution = SVTestUtils.makeDummySequence(60, (byte)'C'); + substitution = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(60, (byte)'C'); breakpoints = forLongRangeSubstitution_fatIns_plus.biPathBubble; breakpointsDetectedFromReverseStrand = forLongRangeSubstitution_fatIns_minus.biPathBubble; seeIfItWorksForNonSimpleTranslocations(breakpoints, StrandSwitch.NO_SWITCH, @@ -274,7 +274,7 @@ public void testGetBreakpoints_longRangeSubstitution() { substitution); Assert.assertEquals(breakpointsDetectedFromReverseStrand, breakpoints); - substitution = SVTestUtils.makeDummySequence(55, (byte)'C'); + substitution = SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(55, (byte)'C'); breakpoints = forLongRangeSubstitution_DelAndIns_plus.biPathBubble; breakpointsDetectedFromReverseStrand = forLongRangeSubstitution_DelAndIns_minus.biPathBubble; seeIfItWorksForNonSimpleTranslocations(breakpoints, StrandSwitch.NO_SWITCH, @@ -313,9 +313,9 @@ public void testGetBreakpoints_tandemDuplication_contraction_simple() { seeIfItWorksForNonSimpleTranslocations(breakpoints, StrandSwitch.NO_SWITCH, new SimpleInterval("21", 100040, 100040), new SimpleInterval("21", 100050, 100050), new SimpleInterval("21", 100041, 100050), - new String(SVTestUtils.makeDummySequence(10, (byte)'C')), "", + new String(SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(10, (byte)'C')), "", 2, 1, Collections.emptyList(), - SVTestUtils.makeDummySequence(10, (byte)'C')); + SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(10, (byte)'C')); Assert.assertEquals(breakpointsDetectedFromReverseStrand, breakpoints); } @@ -332,7 +332,7 @@ public void testGetBreakpoints_tandemDuplication_expansion_simple() { new SimpleInterval("21", 100041, 100050), "", "", 1, 2, Arrays.asList("10M", "10M"), - SVTestUtils.makeDummySequence(20, (byte)'C')); + SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(20, (byte)'C')); Assert.assertEquals(breakpointsDetectedFromReverseStrand, breakpoints); } @@ -497,10 +497,10 @@ public void testRefOrderSwitch() { new SimpleInterval("chr21", 39192594, 39192692), 252 ,350, TextCigarCodec.decode("251S99M26S"), true, 32, 1, 94, ContigAlignmentsModifier.AlnModType.NONE); - SimpleChimera simpleChimera = new SimpleChimera(region1, region2, Collections.emptyList(), "testContig", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, b38_seqDict); + SimpleChimera simpleChimera = new SimpleChimera(region1, region2, Collections.emptyList(), "testContig", NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, SVDiscoveryTestUtilsAndCommonDataProvider.b38_seqDict_chr20_chr21); NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(simpleChimera, "TTCCTTAAAATGCAGGTGAATACAAGAATTAGGTTTCAGGTTTTATATATATATTCTGATATATATATATAATATAACCTGAGATATATATATAAATATATATATTAATATATATTAATATATATAAATATATATATATTAATATATATTTATATATAAATATATATATATTAATATATATAAATATATATAAATATATATATATTAATATATATTAATATATAAATATATATATATTAATATATATTAATATATATAAATATATATATTAATATATATAAATATATATATAAATATATATAAATATATAAATATATATATAAATATATATAAATATATATAAATATATATACACACATACATACACATATACATT".getBytes(), - b38_seqDict); + SVDiscoveryTestUtilsAndCommonDataProvider.b38_seqDict_chr20_chr21); Assert.assertEquals(breakpoints.getLeftJustifiedLeftRefLoc(), new SimpleInterval("chr21", 39192594, 39192594)); Assert.assertEquals(breakpoints.getLeftJustifiedRightRefLoc(), new SimpleInterval("chr21", 39477346, 39477346)); Assert.assertEquals(breakpoints.getComplication().getHomologyForwardStrandRep(), "ATATATAAATATATATA"); @@ -514,7 +514,7 @@ public void testRefOrderSwitch() { public void testGetType(final NovelAdjacencyAndAltHaplotype breakpoints, final List>> expectedTypeStringAndAttributeKeys) { - final List variants = breakpoints.toSimpleOrBNDTypes(b37_reference, b37_seqDict); + final List variants = breakpoints.toSimpleOrBNDTypes(SVDiscoveryTestUtilsAndCommonDataProvider.b37_reference_20_21, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); Assert.assertEquals(variants.size(), expectedTypeStringAndAttributeKeys.size()); for (int i = 0; i < variants.size(); ++i) { final SvType variant = variants.get(i); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleChimeraUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleChimeraUnitTest.java index 6c106286a12..84f3ce03b7b 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleChimeraUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleChimeraUnitTest.java @@ -8,9 +8,9 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.TextCigarCodec; import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider; import org.broadinstitute.hellbender.tools.spark.sv.discovery.SimpleSVDiscoveryTestDataProvider; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignmentInterval; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigWithFineTunedAlignments; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.ContigAlignmentsModifier; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.StrandSwitch; import org.broadinstitute.hellbender.utils.SimpleInterval; @@ -37,100 +37,100 @@ static List> // simple inversion TestDataForSimpleSVs testData = forSimpleInversionFromLongCtg1WithStrangeLeftBreakpoint; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = forSimpleInversionWithHom_leftPlus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = forSimpleInversionWithHom_leftMinus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = forSimpleInversionWithHom_rightPlus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = forSimpleInversionWithHom_rightMinus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); // simple deletion testData = SimpleSVDiscoveryTestDataProvider.forSimpleDeletion_plus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = SimpleSVDiscoveryTestDataProvider.forSimpleDeletion_minus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); // simple insertion testData = SimpleSVDiscoveryTestDataProvider.forSimpleInsertion_plus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = SimpleSVDiscoveryTestDataProvider.forSimpleInsertion_minus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); // long range substitution testData = SimpleSVDiscoveryTestDataProvider.forLongRangeSubstitution_fudgedDel_plus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = SimpleSVDiscoveryTestDataProvider.forLongRangeSubstitution_fudgedDel_minus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); // simple deletion with homology testData = SimpleSVDiscoveryTestDataProvider.forDeletionWithHomology_plus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = SimpleSVDiscoveryTestDataProvider.forDeletionWithHomology_minus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); // tandem duplication simple contraction testData = SimpleSVDiscoveryTestDataProvider.forSimpleTanDupContraction_plus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = SimpleSVDiscoveryTestDataProvider.forSimpleTanDupContraction_minus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); // tandem duplication simple expansion testData = SimpleSVDiscoveryTestDataProvider.forSimpleTanDupExpansion_ins_plus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = SimpleSVDiscoveryTestDataProvider.forSimpleTanDupExpansion_ins_minus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); // tandem duplication simple expansion with novel insertion testData = SimpleSVDiscoveryTestDataProvider.forSimpleTanDupExpansionWithNovelIns_dup_plus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = SimpleSVDiscoveryTestDataProvider.forSimpleTanDupExpansionWithNovelIns_dup_minus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); // first test (the original observed event, but assigned to a different chromosome): expansion from 1 unit to 2 units with pseudo-homology testData = SimpleSVDiscoveryTestDataProvider.forComplexTanDup_1to2_pseudoHom_plus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = SimpleSVDiscoveryTestDataProvider.forComplexTanDup_1to2_pseudoHom_minus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); // second test: contraction from 2 units to 1 unit with pseudo-homology testData = SimpleSVDiscoveryTestDataProvider.forComplexTanDup_2to1_pseudoHom_plus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = SimpleSVDiscoveryTestDataProvider.forComplexTanDup_2to1_pseudoHom_minus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); // third test: contraction from 3 units to 2 units without pseudo-homology testData = SimpleSVDiscoveryTestDataProvider.forComplexTanDup_3to2_noPseudoHom_plus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = SimpleSVDiscoveryTestDataProvider.forComplexTanDup_3to2_noPseudoHom_minus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); // fourth test: expansion from 2 units to 3 units without pseudo-homology testData = SimpleSVDiscoveryTestDataProvider.forComplexTanDup_2to3_noPseudoHom_plus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); testData = SimpleSVDiscoveryTestDataProvider.forComplexTanDup_2to3_noPseudoHom_minus; - result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); + result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21)); ////////// ABOVE ARE FOR SIMPLE VARIANTS: INS/DEL, DUP EXPANSION, DUP CONTRACTION, INVERSION, BELOW ARE FOR TRANSLOCATION SUSPECTS AND INV DUP @@ -156,36 +156,36 @@ static List> AlignmentInterval intervalOne = new AlignmentInterval(one); AlignmentInterval intervalTwo = new AlignmentInterval(two); - result.add(new Tuple3<>(intervalOne, intervalTwo, SimpleSVDiscoveryTestDataProvider.b38_seqDict)); + result.add(new Tuple3<>(intervalOne, intervalTwo, SVDiscoveryTestUtilsAndCommonDataProvider.b38_seqDict_chr20_chr21)); intervalOne = new AlignmentInterval(new SimpleInterval("chr20", 48513458, 48513545), 1, 88, TextCigarCodec.decode("88M227H"), true, 39, 1, 83, ContigAlignmentsModifier.AlnModType.NONE); intervalTwo = new AlignmentInterval(new SimpleInterval("chr20", 48513297, 48513578), 84, 365, TextCigarCodec.decode("83S282M"), false, 60, 0, 282, ContigAlignmentsModifier.AlnModType.NONE); - result.add(new Tuple3<>(intervalOne, intervalTwo, SimpleSVDiscoveryTestDataProvider.b38_seqDict)); + result.add(new Tuple3<>(intervalOne, intervalTwo, SVDiscoveryTestUtilsAndCommonDataProvider.b38_seqDict_chr20_chr21)); // same-chr translocation suspect, forward and reverse representation intervalOne = new AlignmentInterval(new SimpleInterval("chr20", 61015129, 61015272), 1, 144, TextCigarCodec.decode("144M148H"), true, 60, 1, 139, ContigAlignmentsModifier.AlnModType.NONE); intervalTwo = new AlignmentInterval(new SimpleInterval("chr20", 60992732, 60992880), 144, 292, TextCigarCodec.decode("143S149M"), true, 60, 0, 149, ContigAlignmentsModifier.AlnModType.NONE); - result.add(new Tuple3<>(intervalOne, intervalTwo, SimpleSVDiscoveryTestDataProvider.b38_seqDict)); + result.add(new Tuple3<>(intervalOne, intervalTwo, SVDiscoveryTestUtilsAndCommonDataProvider.b38_seqDict_chr20_chr21)); intervalOne = new AlignmentInterval(new SimpleInterval("chr20", 28861368, 28861775), 1, 409, TextCigarCodec.decode("387M1I21M623H"), false, 60, 22, 286, ContigAlignmentsModifier.AlnModType.NONE); intervalTwo = new AlignmentInterval(new SimpleInterval("chr20", 28896473, 28897229), 276, 1032, TextCigarCodec.decode("275S757M"), false, 60, 1, 752, ContigAlignmentsModifier.AlnModType.NONE); - result.add(new Tuple3<>(intervalOne, intervalTwo, SimpleSVDiscoveryTestDataProvider.b38_seqDict)); + result.add(new Tuple3<>(intervalOne, intervalTwo, SVDiscoveryTestUtilsAndCommonDataProvider.b38_seqDict_chr20_chr21)); // diff-chr translocation suspect without SS intervalOne = new AlignmentInterval(new SimpleInterval("chr21", 24923683, 24923715), 1, 33, TextCigarCodec.decode("33M130H"), true, 60, 0, 33, ContigAlignmentsModifier.AlnModType.NONE); intervalTwo = new AlignmentInterval(new SimpleInterval("chr20", 11590055, 11590197), 21, 163, TextCigarCodec.decode("20S143M"), true, 60, 3, 128, ContigAlignmentsModifier.AlnModType.NONE); - result.add(new Tuple3<>(intervalOne, intervalTwo, SimpleSVDiscoveryTestDataProvider.b38_seqDict)); + result.add(new Tuple3<>(intervalOne, intervalTwo, SVDiscoveryTestUtilsAndCommonDataProvider.b38_seqDict_chr20_chr21)); // diff-chr translocation suspect with SS intervalOne = new AlignmentInterval(new SimpleInterval("chr21", 5374092, 5374747), 1, 656, TextCigarCodec.decode("656M322S"), true, 60, 14, 586, ContigAlignmentsModifier.AlnModType.NONE); intervalTwo = new AlignmentInterval(new SimpleInterval("chr20", 28764673, 28765145), 506, 978, TextCigarCodec.decode("473M505H"), false, 60, 16, 393, ContigAlignmentsModifier.AlnModType.NONE); - result.add(new Tuple3<>(intervalOne, intervalTwo, SimpleSVDiscoveryTestDataProvider.b38_seqDict)); + result.add(new Tuple3<>(intervalOne, intervalTwo, SVDiscoveryTestUtilsAndCommonDataProvider.b38_seqDict_chr20_chr21)); // same-chr reference order switch, but overlaps (hence incomplete picture) intervalOne = new AlignmentInterval(new SimpleInterval("20", 283, 651), 383, 751, TextCigarCodec.decode("382H369M274H"), true, 60, 23, 254, ContigAlignmentsModifier.AlnModType.NONE); intervalTwo = new AlignmentInterval(new SimpleInterval("20", 1, 413), 613, 1025, TextCigarCodec.decode("612H413M"), true, 60, 0, 413, ContigAlignmentsModifier.AlnModType.NONE); - result.add(new Tuple3<>(intervalOne, intervalTwo, SimpleSVDiscoveryTestDataProvider.b38_seqDict)); + result.add(new Tuple3<>(intervalOne, intervalTwo, SVDiscoveryTestUtilsAndCommonDataProvider.b38_seqDict_chr20_chr21)); return result; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyAndSimpleChimeraEvidenceUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyAndSimpleChimeraEvidenceUnitTest.java index d13a0762b4d..3a3feb423d0 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyAndSimpleChimeraEvidenceUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SimpleNovelAdjacencyAndSimpleChimeraEvidenceUnitTest.java @@ -4,6 +4,7 @@ import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider; import org.broadinstitute.hellbender.tools.spark.sv.discovery.SimpleSVDiscoveryTestDataProvider; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -33,10 +34,10 @@ private Object[][] forKryoSerializationAndHashCode() { final NovelAdjacencyAndAltHaplotype biPathBubble = pair._1.biPathBubble; final SimpleChimera forwardRep = new SimpleChimera(pair._1.firstAlignment, pair._1.secondAlignment, Collections.emptyList(), pair._1.evidenceAssemblyContigName, NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, - SimpleSVDiscoveryTestDataProvider.b37_seqDict); + SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); final SimpleChimera reverseRep = new SimpleChimera(pair._2.firstAlignment, pair._2.secondAlignment, Collections.emptyList(), pair._2.evidenceAssemblyContigName, NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME, - SimpleSVDiscoveryTestDataProvider.b37_seqDict); + SVDiscoveryTestUtilsAndCommonDataProvider.b37_seqDict_20_21); final List evidence = Arrays.asList(forwardRep, reverseRep); data.add(new Object[]{new SimpleNovelAdjacencyAndChimericAlignmentEvidence(biPathBubble, evidence)}); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/integration/StructuralVariationDiscoveryPipelineSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/integration/StructuralVariationDiscoveryPipelineSparkIntegrationTest.java index 5be2a4e8dc0..3094a814cd0 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/integration/StructuralVariationDiscoveryPipelineSparkIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/integration/StructuralVariationDiscoveryPipelineSparkIntegrationTest.java @@ -1,7 +1,6 @@ package org.broadinstitute.hellbender.tools.spark.sv.integration; import htsjdk.samtools.util.CloseableIterator; -import htsjdk.samtools.util.CloserUtil; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFFileReader; import org.apache.hadoop.fs.Path; @@ -176,11 +175,12 @@ static void svDiscoveryVCFEquivalenceTest(final String generatedVCFPath, final S final String experimentalOutputPathForNonComplex, final List attributesToIgnore, final boolean onHDFS) throws Exception { - final VCFFileReader fileReader = new VCFFileReader(new File(expectedVCFPath), false); - final CloseableIterator iterator = fileReader.iterator(); - final List expectedVcs = Utils.stream(iterator).collect(Collectors.toList()); - CloserUtil.close(iterator); - CloserUtil.close(fileReader); + List expectedVcs; + try (final VCFFileReader fileReader = new VCFFileReader(new File(expectedVCFPath), false) ) { + try (final CloseableIterator iterator = fileReader.iterator()) { + expectedVcs = Utils.stream(iterator).collect(Collectors.toList()); + } + } List actualVcs = extractActualVCs(generatedVCFPath, onHDFS); @@ -200,23 +200,21 @@ static void svDiscoveryVCFEquivalenceTest(final String generatedVCFPath, final S } } - private static List extractActualVCs(final String generatedVCFPath, final boolean onHDFS) + static List extractActualVCs(final String generatedVCFPath, final boolean onHDFS) throws IOException { - final VCFFileReader fileReader; + final File appropriateVCF; if (onHDFS) { - final File tempLocalVCF = GATKBaseTest.createTempFile("variants", "vcf"); - tempLocalVCF.deleteOnExit(); - BucketUtils.copyFile(generatedVCFPath, tempLocalVCF.getAbsolutePath()); - fileReader = new VCFFileReader(tempLocalVCF, false); + appropriateVCF = GATKBaseTest.createTempFile("variants", "vcf"); + appropriateVCF.deleteOnExit(); + BucketUtils.copyFile(generatedVCFPath, appropriateVCF.getAbsolutePath()); } else { - fileReader = new VCFFileReader(new File(generatedVCFPath), false); + appropriateVCF = new File(generatedVCFPath); + } + try (final VCFFileReader fileReader = new VCFFileReader(appropriateVCF, false)) { + try (final CloseableIterator iterator = fileReader.iterator()) { + return Utils.stream(iterator).collect(Collectors.toList()); + } } - final CloseableIterator iterator = fileReader.iterator(); - final List actualVcs = Utils.stream(iterator).collect(Collectors.toList()); - CloserUtil.close(iterator); - CloserUtil.close(fileReader); - - return actualVcs; } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVVCFWriterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVVCFWriterUnitTest.java index 0a49390e94a..9e2a2ed51e2 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVVCFWriterUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVVCFWriterUnitTest.java @@ -10,7 +10,7 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource; import org.broadinstitute.hellbender.engine.datasources.ReferenceWindowFunctions; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVTestUtils; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider; import org.broadinstitute.hellbender.utils.reference.ReferenceUtils; import org.testng.Assert; import org.testng.annotations.Test; @@ -27,8 +27,8 @@ public class SVVCFWriterUnitTest extends GATKBaseTest { @Test(groups = "sv") public void testSortVariantsByCoordinate(){ - final String insOne = "AAA";new String(SVTestUtils.makeDummySequence(100, (byte)'A')); - final String insTwo = "AAC";new String(SVTestUtils.makeDummySequence(100, (byte)'C')); + final String insOne = "AAA";new String(SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'A')); + final String insTwo = "AAC";new String(SVDiscoveryTestUtilsAndCommonDataProvider.makeDummySequence(100, (byte)'C')); final String contig = "21"; final int pos = 100001; From a9bd30d5e6c157b38fcc1e09e16d90b214b86e55 Mon Sep 17 00:00:00 2001 From: Steve Huang Date: Fri, 23 Mar 2018 11:06:42 -0400 Subject: [PATCH 2/4] (SV) feature commit: adds a new tool named CpxVariantReInterprepterSpark to extract barebone-annotated simple variants from an GATK-SV discovery pipeline produced VCF containing complex variants --- .../CpxVariantReInterpreterSpark.java | 104 ++ ...entedCpxVariantSimpleVariantExtractor.java | 993 ++++++++++++++++++ 2 files changed, 1097 insertions(+) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantReInterpreterSpark.java create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SegmentedCpxVariantSimpleVariantExtractor.java diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantReInterpreterSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantReInterpreterSpark.java new file mode 100644 index 00000000000..047a9d7e0f4 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantReInterpreterSpark.java @@ -0,0 +1,104 @@ +package org.broadinstitute.hellbender.tools.spark.sv.discovery.inference; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFHeader; +import org.apache.http.annotation.Experimental; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.ArgumentCollection; +import org.broadinstitute.barclay.argparser.BetaFeature; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.barclay.help.DocumentedFeature; +import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup; +import org.broadinstitute.hellbender.engine.filters.ReadFilter; +import org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary; +import org.broadinstitute.hellbender.engine.spark.GATKSparkTool; +import org.broadinstitute.hellbender.engine.spark.datasources.VariantsSparkSource; +import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoveryInputMetaData; +import org.broadinstitute.hellbender.tools.spark.sv.utils.SVVCFWriter; +import org.broadinstitute.hellbender.utils.read.GATKRead; + +import java.util.Collections; +import java.util.List; + +/** + * (Internal) Tries to extract simple variants from a provided GATK-SV CPX.vcf + */ +@DocumentedFeature +@BetaFeature +@Experimental +@CommandLineProgramProperties( + oneLineSummary = "(Internal) Tries to extract simple variants from a provided GATK-SV CPX.vcf", + summary = + "This tool is used in development and should not be of interest to most researchers." + + " It is a prototype of complex structural variant re-interpretation." + + " In particular, it tries to extract basic SVTYPE's from a user-provided GATK-SV CPX.vcf," + + " and outputs two VCF files containing bare bone information on the simple variants.", + programGroup = StructuralVariantDiscoveryProgramGroup.class) +public class CpxVariantReInterpreterSpark extends GATKSparkTool { + private static final long serialVersionUID = 1L; + private final Logger localLogger = LogManager.getLogger(CpxVariantReInterpreterSpark.class); + + @Override + public boolean requiresReference() { + return true; + } + + @Override + public boolean requiresReads() { + return true; + } + + @Override + public List getDefaultReadFilters() { + return Collections.singletonList(ReadFilterLibrary.MAPPED); + } + + @ArgumentCollection + private StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection + discoverStageArgs + = new StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection(); + + @Argument(doc = "file containing non-canonical chromosome names (e.g chrUn_KI270588v1) in the reference, human reference (hg19 or hg38) assumed when omitted", + shortName = "alt-tigs", + fullName = "non-canonical-contig-names-file", optional = true) + public String nonCanonicalChromosomeNamesFile; + + @Argument(doc = "file containing complex variants as output by GATK-SV", + fullName = "cpx-vcf") + private String complexVCF; + + @Argument(doc = "prefix for two files containing derived simple variants for complex variants having one/multiple entry in SEGMENT annotation", + fullName = "prefix-out-vcf") + private String derivedSimpleVCFPrefix; + + @Override + protected void runTool(final JavaSparkContext ctx) { + + // TODO: 5/9/18 getback sample name in output files + final SAMFileHeader headerForReads = getHeaderForReads(); + final SvDiscoveryInputMetaData svDiscoveryInputMetaData = + new SvDiscoveryInputMetaData(ctx, discoverStageArgs, nonCanonicalChromosomeNamesFile, + derivedSimpleVCFPrefix, + null, null, null, null, + headerForReads, getReference(), localLogger); + + final JavaRDD complexVariants = new VariantsSparkSource(ctx) + .getParallelVariantContexts(complexVCF, getIntervals()); + final JavaRDD assemblyRawAlignments = getReads(); + + final SegmentedCpxVariantSimpleVariantExtractor.ExtractedSimpleVariants extract = + SegmentedCpxVariantSimpleVariantExtractor.extract(complexVariants, svDiscoveryInputMetaData, assemblyRawAlignments); + + final String derivedOneSegmentSimpleVCF = derivedSimpleVCFPrefix + "_1_seg.vcf"; + final String derivedMultiSegmentSimpleVCF = derivedSimpleVCFPrefix + "_multi_seg.vcf"; + final VCFHeader vcfHeader = VariantsSparkSource.getHeader(complexVCF); + SVVCFWriter.writeVCF(extract.getReInterpretZeroOrOneSegmentCalls(), derivedOneSegmentSimpleVCF, vcfHeader.getSequenceDictionary(), logger); + SVVCFWriter.writeVCF(extract.getReInterpretMultiSegmentsCalls(), derivedMultiSegmentSimpleVCF, vcfHeader.getSequenceDictionary(), logger); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SegmentedCpxVariantSimpleVariantExtractor.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SegmentedCpxVariantSimpleVariantExtractor.java new file mode 100644 index 00000000000..2ff93bacbf2 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SegmentedCpxVariantSimpleVariantExtractor.java @@ -0,0 +1,993 @@ +package org.broadinstitute.hellbender.tools.spark.sv.discovery.inference; + +import com.google.common.annotations.VisibleForTesting; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.vcf.VCFConstants; +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.broadcast.Broadcast; +import org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource; +import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.DiscoverVariantsFromContigAlignmentsSAMSpark; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SimpleSVType; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoverFromLocalAssemblyContigAlignmentsSpark; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoveryInputMetaData; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignedContig; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigWithFineTunedAlignments; +import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.read.GATKRead; +import scala.Tuple2; +import scala.Tuple3; + +import java.io.IOException; +import java.io.Serializable; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import static org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection.STRUCTURAL_VARIANT_SIZE_LOWER_BOUND; +import static org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants.*; + +/** + * For extracting simple variants from input GATK-SV complex variants. + * + * Some explanation on several concepts: + * + *

+ * Anchor ref base: + * anchor base is defined per-VCF spec (see 1.4.1#REF version 4.2), that is, for DEL and INS variants + * the reference base at the position pointed to by POS, basically: + * for DEL, the reference bases immediately following POS are deleted (up to and including the END base), + * for INS, the sequence annotated in INSSEQ are inserted immediately after POS. + *

+ * + *

+ * "Fat" insertion: + * they exist because sometimes we have micro deletions surrounding the insertion breakpoint, + * so here the strategy is to report them as "fat", i.e. the anchor base and deleted bases are reported in REF; + * they are fat in the sense that compared to simple insertions where a single anchor ref base is necessary + *

+ * + *

+ * It is also assumed that POS and END of the input complex {@link VariantContext} are the boundaries + * of the bases where REF and ALT allele share similarity, in other words, + * immediately after POS and before END is where the REF and ALT allele differ, and the two path merges at POS/END. + *

+ */ +public abstract class SegmentedCpxVariantSimpleVariantExtractor implements Serializable { + private static final long serialVersionUID = 1L; + + // TODO: 5/2/18 for use in output VCF to link to original CPX variant, to be moved to GATKSVVCFConstants + static String EVENT_KEY = "CPX_EVENT"; + private static int EVENT_SIZE_THRESHOLD = STRUCTURAL_VARIANT_SIZE_LOWER_BOUND - 1; + static final String CPX_DERIVED_POSTFIX_STRING = "CPX_DERIVED"; + private static String makeID(final String typeName, final String chr, final int start, final int stop) { + return typeName + INTERVAL_VARIANT_ID_FIELD_SEPARATOR + + chr + INTERVAL_VARIANT_ID_FIELD_SEPARATOR + + start + INTERVAL_VARIANT_ID_FIELD_SEPARATOR + + stop + INTERVAL_VARIANT_ID_FIELD_SEPARATOR + + CPX_DERIVED_POSTFIX_STRING; + } + + // TODO: 5/2/18 move to a utility class + /** + * this exist because for whatever reason, + * VC.getAttributeAsStringList() sometimes returns a giant single string, while using + * VC.getAttributeAsString() gives back an array..... + */ + static List getAttributeAsStringList(final VariantContext vc, final String attributeKey) { + if (vc.getAttribute(attributeKey) == null) return Collections.emptyList(); + return vc.getAttributeAsStringList(attributeKey, "").stream() + .flatMap(s -> { + if ( s.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR) ) { + final String[] split = s.split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR); + return Arrays.stream(split); + } else { + return Stream.of(s); + } + }) + .collect(Collectors.toList()); + } + static SimpleInterval makeOneBpInterval(final String chr, final int pos) { + return new SimpleInterval(chr, pos, pos); + } + + public static final class ExtractedSimpleVariants { + private final List reInterpretZeroOrOneSegmentCalls; + private final List reInterpretMultiSegmentsCalls; + + private ExtractedSimpleVariants(final List reInterpretZeroOrOneSegmentCalls, + final List reInterpretMultiSegmentsCalls) { + this.reInterpretZeroOrOneSegmentCalls = reInterpretZeroOrOneSegmentCalls; + this.reInterpretMultiSegmentsCalls = reInterpretMultiSegmentsCalls; + } + + public List getReInterpretZeroOrOneSegmentCalls() { + return reInterpretZeroOrOneSegmentCalls; + } + + public List getReInterpretMultiSegmentsCalls() { + return reInterpretMultiSegmentsCalls; + } + } + + // main interface to user code + public static ExtractedSimpleVariants extract(final JavaRDD complexVariants, + final SvDiscoveryInputMetaData svDiscoveryInputMetaData, + final JavaRDD assemblyRawAlignments) { + + final Broadcast referenceBroadcast = svDiscoveryInputMetaData.getReferenceData().getReferenceBroadcast(); + + // still does an in-efficient 2-pass on the input RDD: 1 pass for zero- and one-segment calls, the other for multi-segment calls + // that was due to restriction from how multi-segment calls are to be re-interpreted + final ZeroAndOneSegmentCpxVariantExtractor zeroAndOneSegmentCpxVariantExtractor = new ZeroAndOneSegmentCpxVariantExtractor(); + final JavaRDD zeroOrOneSegmentComplexVariants = complexVariants + .filter(vc -> getAttributeAsStringList(vc, CPX_SV_REF_SEGMENTS).size() < 2) + .cache(); + final List reInterpretedZeroAndOneSegmentCalls = + zeroOrOneSegmentComplexVariants + .flatMap(vc -> zeroAndOneSegmentCpxVariantExtractor.extract(vc, referenceBroadcast.getValue()).iterator()) + .collect(); + zeroOrOneSegmentComplexVariants.unpersist(false); + + final JavaRDD multiSegmentCalls = + complexVariants.filter(vc -> getAttributeAsStringList(vc, CPX_SV_REF_SEGMENTS).size() > 1) + .cache(); + + final MultiSegmentsCpxVariantExtractor multiSegmentsCpxVariantExtractor = new MultiSegmentsCpxVariantExtractor(); + final List sourceWithLessAnnotations = multiSegmentCalls + .flatMap(vc -> multiSegmentsCpxVariantExtractor.extract(vc, referenceBroadcast.getValue()).iterator()).collect(); + + final List sourceWithMoreAnnotations = + reInterpretMultiSegmentComplexVarThroughAlignmentPairIteration(multiSegmentCalls, + svDiscoveryInputMetaData, assemblyRawAlignments); + + final List reInterpretMultiSegmentsCalls = removeDuplicates(sourceWithLessAnnotations, sourceWithMoreAnnotations); + multiSegmentCalls.unpersist(false); + + return new ExtractedSimpleVariants(reInterpretedZeroAndOneSegmentCalls, reInterpretMultiSegmentsCalls); + } + + //================================================================================================================== + + @VisibleForTesting + static final class RelevantAttributes implements Serializable { + private static final long serialVersionUID = 1L; + + private final String id; + private final List referenceSegments; + private final List altArrangements; + + @VisibleForTesting + RelevantAttributes(final VariantContext multiSegmentComplexVar) { + id = multiSegmentComplexVar.getID(); + referenceSegments = getAttributeAsStringList(multiSegmentComplexVar, CPX_SV_REF_SEGMENTS) + .stream().map(SimpleInterval::new).collect(Collectors.toList()); + altArrangements = getAttributeAsStringList(multiSegmentComplexVar, CPX_EVENT_ALT_ARRANGEMENTS); + } + } + + /** + * Send relevant contigs for re-interpretation via the pair-iteration way of scanning the alignments for interpretation. + * + * Re-interpret CPX vcf records whose + * {@link org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants#CPX_SV_REF_SEGMENTS} + * has more than one entries, aka "multi-segment" calls. + * + * Exist basically to extract insertions, because + * deletions and inversions are relatively easy to be extracted by + * {@link org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.SegmentedCpxVariantSimpleVariantExtractor.MultiSegmentsCpxVariantExtractor} + * + * @return the {@link SimpleSVType}-d variants extracted from the input that are consistent with the description in the complex variants + */ + private static List reInterpretMultiSegmentComplexVarThroughAlignmentPairIteration( + final JavaRDD multiSegmentCalls, + final SvDiscoveryInputMetaData svDiscoveryInputMetaData, + final JavaRDD assemblyRawAlignments) { + + final Map contigNameToCpxVariantAttributes = + multiSegmentCalls + .flatMapToPair(complex -> { + final RelevantAttributes relevantAttributes = new RelevantAttributes(complex); + return getAttributeAsStringList(complex, CONTIG_NAMES).stream() + .map(name -> new Tuple2<>(name, relevantAttributes)) + .iterator(); + }) + .collectAsMap(); + + // resend the relevant contigs through the pair-iteration-ed path + final Set relevantContigs = new HashSet<>( contigNameToCpxVariantAttributes.keySet() ); + final JavaRDD relevantAlignments = assemblyRawAlignments.filter(read -> relevantContigs.contains(read.getName())); + final JavaRDD analysisReadyContigs = + SvDiscoverFromLocalAssemblyContigAlignmentsSpark + .preprocess(svDiscoveryInputMetaData, relevantAlignments) + .getContigsWithSignatureClassifiedAsComplex() + .map(AssemblyContigWithFineTunedAlignments::getSourceContig); + + @SuppressWarnings("deprecation") + List pairIterationReInterpreted = DiscoverVariantsFromContigAlignmentsSAMSpark + .discoverVariantsFromChimeras(svDiscoveryInputMetaData, analysisReadyContigs); + + final Broadcast referenceBroadcast = svDiscoveryInputMetaData.getReferenceData().getReferenceBroadcast(); + return pairIterationReInterpreted.stream() + .map(vc -> { + final List consistentComplexVariantIDs = + getAttributeAsStringList(vc, CONTIG_NAMES).stream() + .map(contigNameToCpxVariantAttributes::get) + .filter(attributes -> isConsistentWithCPX(vc, attributes)) + .map(attributes -> attributes.id) + .collect(Collectors.toList()); + if ( consistentComplexVariantIDs.isEmpty()) { + return null; + } else { + return new VariantContextBuilder(vc) + .id(vc.getID() + INTERVAL_VARIANT_ID_FIELD_SEPARATOR + + CPX_DERIVED_POSTFIX_STRING) + .attribute(EVENT_KEY, + String.join(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR, consistentComplexVariantIDs)) + .make(); + } + }) + .filter(Objects::nonNull) + .map(SegmentedCpxVariantSimpleVariantExtractor::postProcessConvertShortDupToIns) + .flatMap(simple -> postProcessConvertReplacementToFatInsOrInsAndDel(simple, referenceBroadcast.getValue())) + .collect(Collectors.toList()); + } + + /** + * Convert short, i.e. duplicated range is < 50 bp, duplication call to insertion call. + */ + @VisibleForTesting + static VariantContext postProcessConvertShortDupToIns(final VariantContext simple) { + final String type = simple.getAttributeAsString(SVTYPE, ""); + if ( type.equals(SimpleSVType.TYPES.DUP.name()) ) { + final SimpleInterval duplicatedRegion = new SimpleInterval(simple.getAttributeAsString(DUP_REPEAT_UNIT_REF_SPAN, "")); + if (duplicatedRegion.size() > EVENT_SIZE_THRESHOLD) { + return simple; + } else { + return new VariantContextBuilder(simple) + .alleles(Arrays.asList(simple.getReference(), altSymbAlleleIns)) + .rmAttribute(SVTYPE) + .attribute(SVTYPE, SimpleSVType.TYPES.INS.name()) + .make(); + } + } else + return simple; + } + + /** + * Pair-iteration way of extracting simple variants reports replacement calls as a single DEL with + * inserted sequence annotations. + * This utility breaks that into: + * when the inserted sequence is long enough, an extra insertion call + * when the deleted range is not long enough, replace with fat insertion. + */ + @VisibleForTesting + static Stream postProcessConvertReplacementToFatInsOrInsAndDel(final VariantContext simple, + final ReferenceMultiSource reference) { + final String type = simple.getAttributeAsString(SVTYPE, ""); + if ( type.equals(SimpleSVType.TYPES.DEL.name()) ) { + final int deletionLen = - simple.getAttributeAsInt(SVLEN, 0); + final int insLen = simple.getAttributeAsInt(INSERTED_SEQUENCE_LENGTH, 0); + if (insLen > EVENT_SIZE_THRESHOLD && deletionLen > EVENT_SIZE_THRESHOLD) { // case 1: insertion and deletion, linked + + final Map attributes = new HashMap<>( simple.getAttributes() ); + attributes.remove(INSERTED_SEQUENCE_MAPPINGS); + attributes.remove(SVLEN); + attributes.remove(SVTYPE); + + VariantContextBuilder newInsertion = makeInsertion(simple.getContig(), simple.getStart(), simple.getStart(), insLen, simple.getReference()); + attributes.forEach(newInsertion::attribute); + newInsertion.rmAttribute(HOMOLOGY).rmAttribute(HOMOLOGY_LENGTH); + newInsertion.rmAttribute(VCFConstants.END_KEY).attribute(VCFConstants.END_KEY, simple.getStart()); + + VariantContextBuilder newDeletion = makeDeletion(new SimpleInterval(simple.getContig(), simple.getStart(), simple.getEnd()), simple.getReference()); + attributes.forEach(newDeletion::attribute); + newDeletion.rmAttribute(INSERTED_SEQUENCE).rmAttribute(INSERTED_SEQUENCE_LENGTH).rmAttribute(SEQ_ALT_HAPLOTYPE); + + // cross linking + newInsertion.attribute(LINK, makeID(SimpleSVType.TYPES.DEL.name(), simple.getContig(), simple.getStart(), simple.getEnd())); + newDeletion.attribute(LINK, makeID(SimpleSVType.TYPES.INS.name(), simple.getContig(), simple.getStart(), simple.getStart())); + + return Stream.of(newDeletion.make(), newInsertion.make()); + } else if (insLen > EVENT_SIZE_THRESHOLD && deletionLen <= EVENT_SIZE_THRESHOLD) { // case 2: insertion with micro deletion + String fatInsertionID = simple.getID().replace("DEL", "INS"); + final Map attributes = new HashMap<>( simple.getAttributes() ); + attributes.remove(INSERTED_SEQUENCE_MAPPINGS); + attributes.remove(HOMOLOGY_LENGTH); + attributes.remove(HOMOLOGY); + attributes.remove(SVLEN); + attributes.remove(SVTYPE); + byte[] referenceBases = getReferenceBases(new SimpleInterval(simple.getContig(), simple.getStart(), simple.getEnd()), reference); + VariantContextBuilder fatInsertion = makeInsertion(simple.getContig(), simple.getStart(), simple.getEnd(), insLen, + Allele.create(referenceBases, true)); + attributes.forEach(fatInsertion::attribute); + fatInsertion.id(fatInsertionID); + return Stream.of(fatInsertion.make()); + } else if (insLen <= EVENT_SIZE_THRESHOLD && deletionLen > EVENT_SIZE_THRESHOLD) { // case 3:deletion with micro insertion + return Stream.of(simple); + } else { // case 4: neither is large enough, rare but possible + return Stream.empty(); + } + } else + return Stream.of(simple); + } + + // TODO: 3/26/18 here we check consistency only for DEL calls, and reject all INV calls (they will be extracted via MultiSegmentsCpxVariantExtractor), and INS consistency check is difficult + /** + * @param simple simple variant derived from pair-iteration logic that is to be checked + * @param attributes source CPX variant attributes + */ + @VisibleForTesting + static boolean isConsistentWithCPX(final VariantContext simple, + final RelevantAttributes attributes) { + + final String typeString = simple.getAttributeAsString(SVTYPE, ""); + + if ( typeString.equals(SimpleSVType.TYPES.DEL.name()) ) { + final List refSegments = attributes.referenceSegments; + final List altArrangement = attributes.altArrangements; + + final Tuple3, Set, List> missingAndPresentAndInvertedSegments = + getMissingAndPresentAndInvertedSegments(refSegments, altArrangement); + final Set missingSegments = missingAndPresentAndInvertedSegments._1(); + + return deletionConsistencyCheck(simple, missingSegments); + } else if ( typeString.equals(SimpleSVType.TYPES.INV.name()) ) { + return false; + } else + return true; + } + + @VisibleForTesting + static boolean deletionConsistencyCheck(final VariantContext simple, final Set missingSegments) { + if (missingSegments.isEmpty()) return false; + + final SimpleInterval deletedRange = new SimpleInterval(simple.getContig(), simple.getStart() + 1, simple.getEnd()); + // dummy number for chr to be used in constructing SVInterval, since 2 input AI's both map to the same chr by this point + final int dummyChr = -1; + final SVInterval intervalOne = new SVInterval(dummyChr, deletedRange.getStart() - 1, deletedRange.getEnd()); + + for (final SimpleInterval missing : missingSegments) { + if ( ! missing.overlaps(deletedRange) ) + return false; + final SVInterval intervalTwo = new SVInterval(dummyChr, missing.getStart() - 1, missing.getEnd()); + // allow 1-base fuzziness from either end + if ( Math.abs(missing.size() - deletedRange.size()) > 2 ) + return false; + if( 2 >= Math.abs( Math.min(missing.size(), deletedRange.size()) - intervalTwo.overlapLen(intervalOne) ) ){ + return true; + } + } + return false; + } + + /** + * Exist for equals() and hashCode() + */ + private static final class AnnotatedInterval { + + private final VariantContext sourceVC; // NOTE: omitted in equals() and hashCode() on purpose + + final SimpleInterval interval; + final String id; + final String type; + final int svlen; + final List alleles; + + private AnnotatedInterval(final VariantContext vc) { + sourceVC = vc; + interval = new SimpleInterval( vc.getContig(), vc.getStart(), vc.getEnd()); + id = vc.getID(); + type = vc.getAttributeAsString(SVTYPE, ""); + svlen = vc.getAttributeAsInt(SVLEN, 0); + alleles = vc.getAlleles(); + } + + @Override + public boolean equals(final Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + final AnnotatedInterval interval1 = (AnnotatedInterval) o; + + if (svlen != interval1.svlen) return false; + if (!interval.equals(interval1.interval)) return false; + if (!id.equals(interval1.id)) return false; + if (!type.equals(interval1.type)) return false; + return alleles.equals(interval1.alleles); + } + + @Override + public int hashCode() { + int result = interval.hashCode(); + result = 31 * result + id.hashCode(); + result = 31 * result + type.hashCode(); + result = 31 * result + svlen; + result = 31 * result + alleles.hashCode(); + return result; + } + } + + /** + * For constructing a map from {@link AnnotatedInterval} to source complex variant IDs and + * their associated assembly contig names. + */ + private static Map, TreeSet>> + getAnnotatedIntervalToSourceCpxIDsAndContigNames(final List extractedSimpleVariants) { + // TODO: 5/11/18 this is suboptimal: + // a round trip to AnnotatedInterval because some CPX variants themselves are duplicated, + // i.e. their alt seq, extracted from different assembly contigs, only differ slightly. + return extractedSimpleVariants.stream().map(AnnotatedInterval::new).collect(Collectors.toCollection(HashSet::new)) + .stream().map(ai -> ai.sourceVC) + .collect(Collectors.toMap(AnnotatedInterval::new, + simpleVC -> { + final TreeSet complexEvents = new TreeSet<>(getAttributeAsStringList(simpleVC, EVENT_KEY)); + final TreeSet sourceCtgNames = new TreeSet<>(getAttributeAsStringList(simpleVC, CONTIG_NAMES)); + return new Tuple2<>(complexEvents, sourceCtgNames); + }) + ); // hashMap is good enough for us + } + + /** + * Exist because the two ways to re-interpret simple variants via + * {@link MultiSegmentsCpxVariantExtractor} + * and via + * {@link #reInterpretMultiSegmentComplexVarThroughAlignmentPairIteration(JavaRDD, SvDiscoveryInputMetaData, JavaRDD)} + * could give essentially the same variants. + */ + @VisibleForTesting + static List removeDuplicates(final List sourceWithLessAnnotations, + final List sourceWithMoreAnnotations) { + + final Map, TreeSet>> rangeToAnnotationsFromSourceWithLessAnnotations = + getAnnotatedIntervalToSourceCpxIDsAndContigNames(sourceWithLessAnnotations); + final Map, TreeSet>> rangeToAnnotationsFromSourceWithMoreAnnotations = + getAnnotatedIntervalToSourceCpxIDsAndContigNames(sourceWithMoreAnnotations); + + final List result = new ArrayList<>(sourceWithMoreAnnotations.size() + sourceWithLessAnnotations.size()); + for (final Map.Entry, TreeSet>> entry: rangeToAnnotationsFromSourceWithMoreAnnotations.entrySet()) { + final AnnotatedInterval interval = entry.getKey(); + final Tuple2, TreeSet> sourceAttributes = entry.getValue(); + final Tuple2, TreeSet> anotherSourceAttributes = rangeToAnnotationsFromSourceWithLessAnnotations.get(interval); + if (anotherSourceAttributes == null) { // variant unique to one source + result.add( interval.sourceVC ); + } else { // found duplicate, merge annotations + final TreeSet sourceCpxIDs = sourceAttributes._1; + final TreeSet sourceCtgNames = sourceAttributes._2; + sourceCpxIDs.addAll(anotherSourceAttributes._1); + sourceCtgNames.addAll(anotherSourceAttributes._2); + final VariantContextBuilder variant = new VariantContextBuilder(interval.sourceVC) + .rmAttribute(EVENT_KEY) + .attribute(EVENT_KEY, String.join(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR, sourceCpxIDs)) + .rmAttribute(CONTIG_NAMES) + .attribute(CONTIG_NAMES, String.join(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR, sourceCtgNames)); + result.add( variant.make()); + + rangeToAnnotationsFromSourceWithLessAnnotations.remove(interval); // remove from the other source + } + } + + // now anotherSource has only unique records + rangeToAnnotationsFromSourceWithLessAnnotations.keySet().forEach(interval -> result.add(interval.sourceVC)); + + return result; + } + + //================================================================================================================== + + abstract List extract(final VariantContext complexVC, final ReferenceMultiSource reference); + + @VisibleForTesting + static final class ZeroAndOneSegmentCpxVariantExtractor extends SegmentedCpxVariantSimpleVariantExtractor { + private static final long serialVersionUID = 1L; + + /** + * Depending on how the ref segment is present in alt arrangement (if at all), logic as follows (order is important): + *
    + *
  • + * if ref segment appear inverted and large enough + *
      + *
    • INV call is warranted
    • + *
    • INS call(s) before and after the INV, if inserted sequence long enough
    • + *
    + *
  • + * + *
  • + * otherwise if ref segment is present as-is, i.e. no deletion call can be made, + * make insertion calls when possible + *
  • + *
  • + * otherwise + *
      + *
    • if the segment is large enough, make a DEL call, and insertion calls when possible
    • + *
    • otherwise a single fat INS call
    • + *
    + *
  • + *
+ * + *

+ * Note that the above logic has a bias towards getting INV calls, because + * when the (large enough) reference segment appears both as-is and inverted, + * the above logic will emit at least an INV call, + * whereas the (inverted) duplication(s) could also be reported as an DUP call as well, but... + *

+ */ + @Override + List extract(final VariantContext complexVC, final ReferenceMultiSource reference) { + + final List segments = getAttributeAsStringList(complexVC, CPX_SV_REF_SEGMENTS); + if (segments.isEmpty()) return whenZeroSegments(complexVC, reference); + + final SimpleInterval refSegment = new SimpleInterval(segments.get(0)); + final List altArrangement = getAttributeAsStringList(complexVC, CPX_EVENT_ALT_ARRANGEMENTS); + final int altSeqLength = complexVC.getAttributeAsString(SEQ_ALT_HAPLOTYPE, "").length(); + + final List result = new ArrayList<>(); + + final int asIsAppearanceIdx = altArrangement.indexOf("1"); + final int invertedAppearanceIdx = altArrangement.indexOf("-1"); + if (invertedAppearanceIdx != -1 && refSegment.size() > EVENT_SIZE_THRESHOLD) { // inversion call + whenInversionIsWarranted(refSegment, invertedAppearanceIdx, altArrangement, reference, result); + } else if (asIsAppearanceIdx != -1) { // no inverted appearance or appear inverted but not large enough, and in the mean time appear as-is, so no deletion + whenNoDeletionIsAllowed(refSegment, asIsAppearanceIdx, altArrangement, altSeqLength, reference, result); + } else { // no as-is appearance && (inverted appearance might present not not large enough) + whenNoInvAndNoAsIsAppearance(refSegment, altSeqLength, reference, result); + } + + final String sourceID = complexVC.getID(); + final List evidenceContigs = getAttributeAsStringList(complexVC, CONTIG_NAMES); + return result.stream() + .map(vc -> vc.attribute(EVENT_KEY, sourceID).attribute(CONTIG_NAMES, evidenceContigs).make()) + .collect(Collectors.toList()); + } + + private List whenZeroSegments(final VariantContext complexVC, final ReferenceMultiSource reference) { + final Allele anchorBaseRefAllele = getAnchorBaseRefAllele(complexVC.getContig(), complexVC.getStart(), reference); + final int altSeqLength = complexVC.getAttributeAsString(SEQ_ALT_HAPLOTYPE, "").length() - 2; + final VariantContext insertion = makeInsertion(complexVC.getContig(), complexVC.getStart(), complexVC.getStart(), altSeqLength, anchorBaseRefAllele) + .attribute(EVENT_KEY, complexVC.getID()) + .attribute(CONTIG_NAMES, complexVC.getAttribute(CONTIG_NAMES)) + .make(); + return Collections.singletonList(insertion); + } + + private static void whenInversionIsWarranted(final SimpleInterval refSegment, final int invertedAppearanceIdx, + final List altArrangement, final ReferenceMultiSource reference, + final List result) { + + final Allele anchorBaseRefAllele = getAnchorBaseRefAllele(refSegment.getContig(), refSegment.getStart(), reference); + result.add( makeInversion(refSegment, anchorBaseRefAllele) ); + + // further check if alt seq length is long enough to trigger an insertion as well, + // but guard against case smallIns1 + INV + smallIns2, in theory one could annotate the inversion + // with micro-insertions if that's the case, but we try to have minimal annotations here + final Allele anchorBaseRefAlleleFront = getAnchorBaseRefAllele(refSegment.getContig(), refSegment.getStart() - 1, reference); + final Allele anchorBaseRefAlleleRear = getAnchorBaseRefAllele(refSegment.getContig(), refSegment.getEnd(), reference); + extractFrontAndRearInsertions(refSegment, invertedAppearanceIdx, altArrangement, + anchorBaseRefAlleleFront, anchorBaseRefAlleleRear, result); + } + + private static void whenNoDeletionIsAllowed(final SimpleInterval refSegment, final int asIsAppearanceIdx, + final List altArrangement, final int altSeqLength, + final ReferenceMultiSource reference, final List result) { + final int segmentSize = refSegment.size(); + if (altSeqLength - segmentSize > EVENT_SIZE_THRESHOLD ) { // long enough net gain to trigger insertion calls + // distinguish between cases {"1", ....}, {....., "1"}, and {....., "1", ....} to know where to place the insertion + final Allele anchorBaseRefAlleleFront = getAnchorBaseRefAllele(refSegment.getContig(), refSegment.getStart() - 1, reference); + final Allele anchorBaseRefAlleleRear = getAnchorBaseRefAllele(refSegment.getContig(), refSegment.getEnd(), reference); + if ( altArrangement.get(altArrangement.size() - 1).equals("1") ) { // {....., "1"} -> front insertion + final VariantContextBuilder frontIns = + SegmentedCpxVariantSimpleVariantExtractor.makeInsertion(refSegment.getContig(), + refSegment.getStart() - 1, refSegment.getStart() - 1, + altSeqLength - segmentSize, anchorBaseRefAlleleFront); + result.add(frontIns); + } else if ( altArrangement.get(0).equals("1") ) { // {"1", ....} -> rear insertion + final VariantContextBuilder rearIns = + SegmentedCpxVariantSimpleVariantExtractor.makeInsertion(refSegment.getContig(), + refSegment.getEnd(), refSegment.getEnd(), + altSeqLength - segmentSize, anchorBaseRefAlleleFront); + result.add(rearIns); + } else { // {....., "1", ....} -> collect new insertion length before and after + extractFrontAndRearInsertions(refSegment, asIsAppearanceIdx, altArrangement, + anchorBaseRefAlleleFront, anchorBaseRefAlleleRear, result); + } + } + } + + private static void whenNoInvAndNoAsIsAppearance(final SimpleInterval refSegment, final int altSeqLength, + final ReferenceMultiSource reference, final List result) { + if ( refSegment.size() > EVENT_SIZE_THRESHOLD ) { // a deletion call must be present + + final Allele anchorBaseRefAlleleFront = getAnchorBaseRefAllele(refSegment.getContig(), refSegment.getStart(), reference); + + // need left shift because the segment boundaries are shared by REF and ALT + result.add( makeDeletion(new SimpleInterval(refSegment.getContig(), refSegment.getStart(), refSegment.getEnd() - 1), + anchorBaseRefAlleleFront) ); + + // if the replacing sequence is long enough to trigger an insertion as well + if (altSeqLength - 2 > EVENT_SIZE_THRESHOLD) { + result.add(makeInsertion(refSegment.getContig(), refSegment.getStart(), refSegment.getStart(), altSeqLength, anchorBaseRefAlleleFront)); + } + } else if ( altSeqLength - 2 > EVENT_SIZE_THRESHOLD ){ // ref segment not long enough to merit an INV or DEL, so a fat INS, if size is enough + final Allele fatInsertionRefAllele = + Allele.create(getReferenceBases(new SimpleInterval(refSegment.getContig(), refSegment.getStart(), refSegment.getEnd() - 1), reference), true); + result.add( makeInsertion(refSegment.getContig(), refSegment.getStart(), refSegment.getEnd() - 1, + altSeqLength - refSegment.size(), fatInsertionRefAllele) ); + } + } + + private static void extractFrontAndRearInsertions(final SimpleInterval refSegment, final int segmentIdx, + final List altArrangement, + final Allele anchorBaseRefAlleleFront, + final Allele anchorBaseRefAlleleRear, + final List result) { + + final List segmentLen = Collections.singletonList(refSegment.size()); + + final SimpleInterval frontInsPos = makeOneBpInterval(refSegment.getContig(), refSegment.getStart() - 1); + final VariantContextBuilder frontIns = + getInsFromOneEnd(true, segmentIdx, frontInsPos, anchorBaseRefAlleleFront, segmentLen, altArrangement, true); + if (frontIns != null) + result.add(frontIns); + + final SimpleInterval rearInsPos = makeOneBpInterval(refSegment.getContig(), refSegment.getEnd()); + final VariantContextBuilder rearIns = + getInsFromOneEnd(false, segmentIdx, rearInsPos, anchorBaseRefAlleleRear, segmentLen, altArrangement, true); + if (rearIns != null) + result.add(rearIns); + } + } + + @VisibleForTesting + static final class MultiSegmentsCpxVariantExtractor extends SegmentedCpxVariantSimpleVariantExtractor { + private static final long serialVersionUID = 1L; + + @Override + List extract(final VariantContext complexVC, final ReferenceMultiSource reference) { + + final List refSegments = + getAttributeAsStringList(complexVC, CPX_SV_REF_SEGMENTS).stream() + .map(SimpleInterval::new) + .collect(Collectors.toList()); + + final List altArrangement = getAttributeAsStringList(complexVC, CPX_EVENT_ALT_ARRANGEMENTS); + + final Tuple3, Set, List> missingAndPresentAndInvertedSegments = getMissingAndPresentAndInvertedSegments(refSegments, altArrangement); + final Set missingSegments = missingAndPresentAndInvertedSegments._1(); + final Set presentSegments = missingAndPresentAndInvertedSegments._2(); + final List invertedSegments = missingAndPresentAndInvertedSegments._3(); + + final List result = new ArrayList<>(); + + // if affected ref sequence found as is (trusting the aligner), then only output front and/or rear insertions + final int idx = findAllSegments(altArrangement, refSegments.size()); + if ( idx >= 0 ) { + whenAllSegmentsAppearAsIs(complexVC, reference, refSegments, altArrangement, result, idx); + } else { + + // inversions + if (!invertedSegments.isEmpty()) { + extractInversions(reference, refSegments, presentSegments, invertedSegments, result); + } + + // deletions + if (!missingSegments.isEmpty()) { + extractDeletions(reference, missingSegments, result); + } + + // head and tail insertions only + extractFrontAndRearInsertions(complexVC, refSegments, altArrangement, result); + } + + final String sourceID = complexVC.getID(); + final List evidenceContigs = getAttributeAsStringList(complexVC, CONTIG_NAMES); + + return result.stream() + .map(vc -> vc.attribute(EVENT_KEY, sourceID).attribute(CONTIG_NAMES, evidenceContigs).make()) + .collect(Collectors.toList()); + } + + /** + * Given {@code altArrangement} and count of segments, return the index in {@code altArrangement} + * pointing to "1" where all segments contiguously appear after that, i.e. the affected reference region + * appear as is (with other insertions, duplications, etc at either end) according to {@code altArrangement} + * + * Example: + * ......, 1, 2, 3, 4, ..... + * with 4 segments, + * and the index of the "1" is 2, + * then this function returns 2. + * but if the altArrangement is + * ......, 1, 2, 3, , ..... + * the function returns -1 because not all segments appear as-is in the description. + */ + @VisibleForTesting + static int findAllSegments(final List altArrangement, final int segmentCount) { + int idx = -1; + int currentlyLookingForSegment = segmentCount; + final String segmentCountString = String.valueOf(segmentCount); + for (int i = altArrangement.size() - 1; i >= 0 ; --i) { // reversely because we want to follow left-justify convention + final String description = altArrangement.get(i); + if ( description.equals( String.valueOf(currentlyLookingForSegment) ) ) { + if (currentlyLookingForSegment == 1) return i; + --currentlyLookingForSegment; + } else { + currentlyLookingForSegment = description.equals( segmentCountString ) ? segmentCount - 1 : segmentCount; + idx = -1; + } + } + return idx; + } + + private static void whenAllSegmentsAppearAsIs(final VariantContext complexVC, final ReferenceMultiSource reference, + final List refSegments, final List altArrangement, + final List result, final int idx) { + final List refSegmentLengths = refSegments.stream().map(SimpleInterval::size).collect(Collectors.toList()); + + if ( idx != 0 ) { // e.g. 4 segments, and alt arrangement is ......, 1,2,3,4, there could be (that is, if long enough) front insertion + final SimpleInterval insertionPos = new SimpleInterval(complexVC.getContig(), + complexVC.getStart() - 1, complexVC.getStart() - 1); + final Allele anchorBaseRefAlleleFront = getAnchorBaseRefAllele(insertionPos.getContig(), insertionPos.getStart(), reference); + final VariantContextBuilder frontIns = getInsFromOneEnd(true, idx, insertionPos, + anchorBaseRefAlleleFront, refSegmentLengths, altArrangement, true); + if (frontIns != null) result.add(frontIns); + } + if ( idx + refSegments.size() - 1 < altArrangement.size() - 1 ) { // e.g. there's more after 1,2,3,4,..., there could be (that is, if long enough) front insertion + final SimpleInterval insertionPos = new SimpleInterval(complexVC.getContig(), complexVC.getEnd(), complexVC.getEnd()); + final byte[] refBases = complexVC.getReference().getBases(); + final Allele anchorBaseRefAlleleRear = Allele.create(refBases[refBases.length - 1], true); + final VariantContextBuilder rearIns = getInsFromOneEnd(false, idx + refSegments.size() - 1, insertionPos, + anchorBaseRefAlleleRear, refSegmentLengths, altArrangement, true); + if (rearIns != null) result.add(rearIns); + } + } + + private void extractInversions(final ReferenceMultiSource reference, final List refSegmentIntervals, + final Set presentSegments, final List invertedSegments, + final List result) { + final List inversions = + invertedSegments.stream() + // large enough; in addition, if both as-is and inverted versions exist, treat as insertions instead of inversions: unlike 1-segment calls, where we don't have consistency problems + .filter(i -> refSegmentIntervals.get(i - 1).size() > EVENT_SIZE_THRESHOLD && (!presentSegments.contains(i))) + .map(i -> { + final SimpleInterval invertedSegment = refSegmentIntervals.get(i - 1); + final byte[] ref = getReferenceBases(makeOneBpInterval(invertedSegment.getContig(), invertedSegment.getStart()), reference); + final Allele refAllele = Allele.create(ref, true); + return makeInversion(invertedSegment, refAllele); + }) + .collect(Collectors.toList()); + result.addAll(inversions); + } + + private void extractDeletions(final ReferenceMultiSource reference, final Set missingSegments, + final List result) { + final List deletions = compactifyMissingSegments(missingSegments).stream() + .filter(gone -> gone.size() > EVENT_SIZE_THRESHOLD) // large enough + .map(gone -> { + final byte[] ref = getReferenceBases(makeOneBpInterval(gone.getContig(), gone.getStart()), reference); + final Allele refAllele = Allele.create(ref, true); + return makeDeletion(new SimpleInterval(gone.getContig(), gone.getStart(), gone.getEnd() - 1), refAllele); + }) + .collect(Collectors.toList()); + result.addAll(deletions); + } + /** + * Compactify missingSegments for case when two neighboring segments are both gone, to avoid cases when + * 1) neither segment is large enough + * 2) calling two small deletions while one should call a big deletion + */ + @VisibleForTesting + static List compactifyMissingSegments(final Set missingSegments) { + if (missingSegments.size() == 1) + return Collections.singletonList(missingSegments.iterator().next()); + + // first sort + final List sortedMissingSegments = missingSegments.stream() + .sorted(Comparator.comparing(SimpleInterval::getStart)) // two segments will NEVER have the same start or overlap on more than one base + .collect(Collectors.toList()); + final List result = new ArrayList<>(missingSegments.size()); + Iterator iterator = sortedMissingSegments.iterator(); + SimpleInterval current = iterator.next(); + while (iterator.hasNext()) { + SimpleInterval next = iterator.next(); + if (current.overlapsWithMargin(next, 1)) { + current = new SimpleInterval(current.getContig(), current.getStart(), next.getEnd()); + } else { + result.add(current); + current = next; + } + } + result.add(current); + return result; + } + + private void extractFrontAndRearInsertions(final VariantContext complexVC, final List refSegmentIntervals, + final List altArrangement, + final List result) { + final byte[] refBases = complexVC.getReference().getBases(); + final List refSegmentLengths = refSegmentIntervals.stream().map(SimpleInterval::size).collect(Collectors.toList()); + // index pointing to first appearance of ref segment (inverted or not) in altArrangement, from either side + int firstRefSegmentIdx = 0; // first front + for (final String description : altArrangement) { + if ( descriptionIndicatesInsertion(description)) { + ++firstRefSegmentIdx; + } else { + break; + } + } + if (firstRefSegmentIdx > 0) { + final Allele anchorBaseRefAlleleFront = Allele.create(refBases[0], true); + final SimpleInterval startAndStop = makeOneBpInterval(complexVC.getContig(), complexVC.getStart()); + final VariantContextBuilder frontIns = getInsFromOneEnd(true, firstRefSegmentIdx, startAndStop, anchorBaseRefAlleleFront, refSegmentLengths, altArrangement, true); + if (frontIns != null) result.add( frontIns ); + } + + firstRefSegmentIdx = altArrangement.size() - 1; // then end + for (int i = altArrangement.size() - 1; i > -1 ; --i) { + if ( descriptionIndicatesInsertion(altArrangement.get(i))) { + --firstRefSegmentIdx; + } else { + break; + } + } + + if (firstRefSegmentIdx != altArrangement.size() - 1) { + final Allele anchorBaseRefAlleleRear = Allele.create(refBases[refBases.length - 2], true); + final SimpleInterval startAndStop = makeOneBpInterval(complexVC.getContig(), complexVC.getEnd()); + final VariantContextBuilder rearIns = getInsFromOneEnd(false, firstRefSegmentIdx, startAndStop, anchorBaseRefAlleleRear, refSegmentLengths, altArrangement, true); + if (rearIns != null) result.add( rearIns ); + } + } + + @VisibleForTesting + static boolean descriptionIndicatesInsertion(final String description) { + if (description.startsWith(CpxVariantCanonicalRepresentation.UNMAPPED_INSERTION)) + return true; + return !NumberUtils.isCreatable(description); // "(-)?[0-9]+" is describing segments, we don't count them as insertions + } + } + + //================================================================================================================== + + /** + * Reason for requesting increment by 1 via {@code shouldIncreaseInsLenByOne}: + * when getting insertion length from either end, + * there could be, but not always, a one-bp overlap between the head alignment and + * the next alignment that continues the flow (which is not necessarily the 2nd alignment); + * so when there's such 1-bp overlap, the insertion length should count this 1-bp overlap. + * todo: currently all known calling code provide {@code true}, which is technically wrong, but we need alignment information for tell when to provide true/false + * + * @return {@code null} if the inserted sequence from the requested end is not over {@link #EVENT_SIZE_THRESHOLD} + */ + @VisibleForTesting + static VariantContextBuilder getInsFromOneEnd(final boolean fromFront, final int idxFirstMatch, + final SimpleInterval insertionStartAndStop, final Allele anchorBaseRefAllele, + final List refSegmentLengths, final List altArrangement, + final boolean shouldIncreaseInsLenByOne) { + int insLen = 0; + if (fromFront) { + for (int i = 0; i < idxFirstMatch; ++i) { + insLen += getInsLen( altArrangement.get(i), refSegmentLengths ); + } + } else { + for (int i = idxFirstMatch + 1; i < altArrangement.size(); ++i) { + insLen += getInsLen( altArrangement.get(i), refSegmentLengths ); + } + } + + if (shouldIncreaseInsLenByOne) ++insLen; + + if (insLen > EVENT_SIZE_THRESHOLD) + return makeInsertion(insertionStartAndStop.getContig(), insertionStartAndStop.getStart(), insertionStartAndStop.getEnd(), insLen, anchorBaseRefAllele); + else + return null; + } + + @VisibleForTesting + static int getInsLen(final String description, final List refSegmentLengths) { + if (description.startsWith(CpxVariantCanonicalRepresentation.UNMAPPED_INSERTION)) { + return Integer.valueOf(description.substring(CpxVariantCanonicalRepresentation.UNMAPPED_INSERTION.length() + 1)); + } else if ( NumberUtils.isCreatable(description) ){ + final int offset = description.startsWith("-") ? 1 : 0; + return refSegmentLengths.get( Integer.valueOf(description.substring(offset)) - 1); + } else { + final int offset = description.startsWith("-") ? 1 : 0; + return new SimpleInterval(description.substring(offset)).size(); + } + } + + /** + * Retrieves from the provide {@code complexVC}, reference segments described in + * {@link org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants#CPX_SV_REF_SEGMENTS}, that are + * a) absent + * b) present as is, i.e. not inverted + * c) inverted + */ + @VisibleForTesting + static Tuple3, Set, List> getMissingAndPresentAndInvertedSegments(final List refSegments, + final List altArrangements ) { + + final List invertedSegments = new ArrayList<>(); + final Set presentSegments = new TreeSet<>(); + altArrangements + .forEach(s -> { + if ( s.startsWith("-") && ( !s.contains(":") )) { // some segment inverted + invertedSegments.add( Integer.valueOf(s.substring(1)) ); + } + if ( !s.contains(":") && !s.startsWith(CpxVariantCanonicalRepresentation.UNMAPPED_INSERTION) && !s.startsWith("-") ) { // a ref segment, but not inverted + presentSegments.add(Integer.valueOf(s)); + } + }); + + final Set missingSegments = IntStream.rangeClosed(1, refSegments.size()).boxed() + .filter(i -> !presentSegments.contains(i) && !invertedSegments.contains(i)) + .map(i -> refSegments.get(i-1)) + .collect(Collectors.toSet()); + + return new Tuple3<>(missingSegments, presentSegments, invertedSegments); + } + + // boiler-plate code block ========================================================================================= + + private static Allele getAnchorBaseRefAllele(final String chr, final int pos, final ReferenceMultiSource reference) { + return Allele.create(getReferenceBases(makeOneBpInterval(chr, pos), reference), true); + } + + // try not to have many try's + static byte[] getReferenceBases(final SimpleInterval interval, final ReferenceMultiSource reference) { + try { + return reference.getReferenceBases(interval).getBases(); + } catch (final IOException ioex) { + throw new GATKException("Failed to extract reference bases on:" + interval, ioex); + } + } + + private static final Allele altSymbAlleleDel = Allele.create(SimpleSVType.createBracketedSymbAlleleString(SYMB_ALT_ALLELE_DEL)); + private static final Allele altSymbAlleleIns = Allele.create(SimpleSVType.createBracketedSymbAlleleString(SYMB_ALT_ALLELE_INS)); + private static final Allele altSymbAlleleInv = Allele.create(SimpleSVType.createBracketedSymbAlleleString(SYMB_ALT_ALLELE_INV)); + + /** + * Note that {@code delRange} is expected to be pre-process to VCF spec compatible, + * e.g. if chr1:101-200 is deleted, then {@code delRange} should be chr1:100-200 + * @param delRange + */ + @VisibleForTesting + static VariantContextBuilder makeDeletion(final SimpleInterval delRange, final Allele refAllele) { + + return new VariantContextBuilder() + .chr(delRange.getContig()).start(delRange.getStart()).stop(delRange.getEnd()) + .alleles(Arrays.asList(refAllele, altSymbAlleleDel)) + .id(makeID(SimpleSVType.TYPES.DEL.name(), delRange.getContig(), delRange.getStart(), delRange.getEnd())) + .attribute(VCFConstants.END_KEY, delRange.getEnd()) + .attribute(SVLEN, - delRange.size() + 1) + .attribute(SVTYPE, SimpleSVType.TYPES.DEL.name()); + } + + @VisibleForTesting + static VariantContextBuilder makeInsertion(final String chr, final int pos, final int end, final int svLen, + final Allele refAllele) { + + return new VariantContextBuilder().chr(chr).start(pos).stop(end) + .alleles(Arrays.asList(refAllele, altSymbAlleleIns)) + .id(makeID(SimpleSVType.TYPES.INS.name(), chr, pos, end)) + .attribute(VCFConstants.END_KEY, end) + .attribute(SVLEN, svLen) + .attribute(SVTYPE, SimpleSVType.TYPES.INS.name()); + } + + @VisibleForTesting + static VariantContextBuilder makeInversion(final SimpleInterval invertedRegion, final Allele refAllele) { + return new VariantContextBuilder() + .chr(invertedRegion.getContig()).start(invertedRegion.getStart() - 1).stop(invertedRegion.getEnd()) // TODO: 5/2/18 VCF spec doesn't requst left shift by 1 for inversion POS + .alleles(Arrays.asList(refAllele, altSymbAlleleInv)) + .id(makeID(SimpleSVType.TYPES.INV.name(), invertedRegion.getContig(), invertedRegion.getStart() - 1, invertedRegion.getEnd())) + .attribute(VCFConstants.END_KEY, invertedRegion.getEnd()) + .attribute(SVLEN, 0) // TODO: 5/2/18 this is following VCF spec, + .attribute(SVTYPE, SimpleSVType.TYPES.INV.name()); + } +} From 0db0dae112f8a85a78ec96bd20ceb88bcb7de91b Mon Sep 17 00:00:00 2001 From: Steve Huang Date: Thu, 10 May 2018 14:08:49 -0400 Subject: [PATCH 3/4] hookup to SvDiscoverFromLocalAssemblyContigAlignmentsSpark (hence StructuralVariationDiscoveryPipelineSpark as well) --- ...cturalVariationDiscoveryPipelineSpark.java | 4 ++-- ...romLocalAssemblyContigAlignmentsSpark.java | 21 +++++++++++++------ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java index b54cb3efaa2..d5409d0b820 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java @@ -271,8 +271,8 @@ private static void experimentalInterpretation(final JavaSparkContext ctx, SvDiscoverFromLocalAssemblyContigAlignmentsSpark.AssemblyContigsClassifiedByAlignmentSignatures contigsByPossibleRawTypes = SvDiscoverFromLocalAssemblyContigAlignmentsSpark.preprocess(svDiscoveryInputMetaData, assemblyRawAlignments); - SvDiscoverFromLocalAssemblyContigAlignmentsSpark.dispatchJobs(contigsByPossibleRawTypes, svDiscoveryInputMetaData, - assemblyRawAlignments, true); + SvDiscoverFromLocalAssemblyContigAlignmentsSpark.dispatchJobs(ctx, contigsByPossibleRawTypes, + svDiscoveryInputMetaData, assemblyRawAlignments, true); } private static JavaRDD getContigRawAlignments(final JavaSparkContext ctx, diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java index 8ea3c1db009..417407a3bab 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java @@ -1,10 +1,7 @@ package org.broadinstitute.hellbender.tools.spark.sv.discovery; import com.google.common.annotations.VisibleForTesting; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMFileWriter; -import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.*; import htsjdk.samtools.util.SequenceUtil; import htsjdk.variant.variantcontext.VariantContext; import org.apache.logging.log4j.LogManager; @@ -26,6 +23,7 @@ import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryPipelineSpark; import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.*; import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.CpxVariantInterpreter; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.SegmentedCpxVariantSimpleVariantExtractor; import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.SimpleNovelAdjacencyInterpreter; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVFileUtils; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; @@ -151,7 +149,7 @@ protected void runTool(final JavaSparkContext ctx) { final AssemblyContigsClassifiedByAlignmentSignatures contigsByPossibleRawTypes = preprocess(svDiscoveryInputMetaData, assemblyRawAlignments); - dispatchJobs(contigsByPossibleRawTypes, svDiscoveryInputMetaData, assemblyRawAlignments, writeSAMFiles); + dispatchJobs(ctx, contigsByPossibleRawTypes, svDiscoveryInputMetaData, assemblyRawAlignments, writeSAMFiles); } //================================================================================================================== @@ -243,7 +241,8 @@ public static AssemblyContigsClassifiedByAlignmentSignatures preprocess(final Sv * {@link AssemblyContigWithFineTunedAlignments.AlignmentSignatureBasicType#UNKNOWN} * currently DO NOT generate any VCF yet. */ - public static void dispatchJobs(final AssemblyContigsClassifiedByAlignmentSignatures contigsByPossibleRawTypes, + public static void dispatchJobs(final JavaSparkContext ctx, + final AssemblyContigsClassifiedByAlignmentSignatures contigsByPossibleRawTypes, final SvDiscoveryInputMetaData svDiscoveryInputMetaData, final JavaRDD assemblyRawAlignments, final boolean writeSAMFiles) { @@ -270,6 +269,16 @@ public static void dispatchJobs(final AssemblyContigsClassifiedByAlignmentSignat contigsByPossibleRawTypes.writeSAMfilesForUnknown(outputPrefixWithSampleName, assemblyRawAlignments, svDiscoveryInputMetaData.getSampleSpecificData().getHeaderBroadcast().getValue()); } + + final JavaRDD complexVariantsRDD = ctx.parallelize(complexVariants); + final SegmentedCpxVariantSimpleVariantExtractor.ExtractedSimpleVariants reInterpretedSimple = + SegmentedCpxVariantSimpleVariantExtractor.extract(complexVariantsRDD, svDiscoveryInputMetaData, assemblyRawAlignments); + final SAMSequenceDictionary refSeqDict = svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue(); + final Logger toolLogger = svDiscoveryInputMetaData.getToolLogger(); + final String derivedOneSegmentSimpleVCF = outputPrefixWithSampleName + "cpx_reinterpreted_simple_1_seg.vcf"; + final String derivedMultiSegmentSimpleVCF = outputPrefixWithSampleName + "cpx_reinterpreted_simple_multi_seg.vcf"; + SVVCFWriter.writeVCF(reInterpretedSimple.getReInterpretZeroOrOneSegmentCalls(), derivedOneSegmentSimpleVCF, refSeqDict, toolLogger); + SVVCFWriter.writeVCF(reInterpretedSimple.getReInterpretMultiSegmentsCalls(), derivedMultiSegmentSimpleVCF, refSeqDict, toolLogger); } //================================================================================================================== From 49de85120ec6ed031bc4bc725dd21b01cff8a125 Mon Sep 17 00:00:00 2001 From: Steve Huang Date: Wed, 2 May 2018 21:29:27 -0400 Subject: [PATCH 4/4] (SV) test coverage bump commit: * unit test code * integration test and associated intput & output files --- .../tools/spark/sv/utils/SVVCFWriter.java | 4 +- .../inference/CpxSVInferenceTestUtils.java | 2 +- ...VariantSimpleVariantExtractorUnitTest.java | 745 ++++ ...iantReInterpreterSparkIntegrationTest.java | 145 + ...rSparkIntegrationTest_complex_assembly.bam | 3 + ...rkIntegrationTest_complex_assembly.bam.bai | 3 + ...nterpreterSparkIntegrationTest_complex.vcf | 66 + .../inputs/Homo_sapiens_assembly38.kill.alts | 3341 +++++++++++++++++ .../cpx_reinterpreted_simple_1_seg.vcf | 55 + .../cpx_reinterpreted_simple_multi_seg.vcf | 69 + 10 files changed, 4430 insertions(+), 3 deletions(-) create mode 100644 src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SegmentedCpxVariantSimpleVariantExtractorUnitTest.java create mode 100644 src/test/java/org/broadinstitute/hellbender/tools/spark/sv/integration/CpxVariantReInterpreterSparkIntegrationTest.java create mode 100644 src/test/resources/large/CpxVariantReInterpreterSparkIntegrationTest_complex_assembly.bam create mode 100644 src/test/resources/large/CpxVariantReInterpreterSparkIntegrationTest_complex_assembly.bam.bai create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/inputs/CpxVariantReInterpreterSparkIntegrationTest_complex.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/inputs/Homo_sapiens_assembly38.kill.alts create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/outputs/cpx_reinterpreted_simple_1_seg.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/outputs/cpx_reinterpreted_simple_multi_seg.vcf diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVVCFWriter.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVVCFWriter.java index 1a5d4cf3f84..98664d74a27 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVVCFWriter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVVCFWriter.java @@ -70,8 +70,8 @@ private static void logNumOfVarByTypes(final List variants, fina // sorting these variants must take into account of such complications. // the solution below is hackish @VisibleForTesting - static List sortVariantsByCoordinate(final List variants, - final SAMSequenceDictionary referenceSequenceDictionary) { + public static List sortVariantsByCoordinate(final List variants, + final SAMSequenceDictionary referenceSequenceDictionary) { return variants.stream().sorted((VariantContext v1, VariantContext v2) -> { final int x = IntervalUtils.compareLocatables(v1, v2, referenceSequenceDictionary); if (x == 0) { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxSVInferenceTestUtils.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxSVInferenceTestUtils.java index ad60a0a45b9..9fa4f7a9379 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxSVInferenceTestUtils.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxSVInferenceTestUtils.java @@ -66,7 +66,7 @@ static final class PreprocessedAndAnalysisReadyContigWithExpectedResults { /** * We are having this because it is SV, especially complex ones, are rare and events on chr20 and 21 are not enough. */ - final static SAMSequenceDictionary bareBoneHg38SAMSeqDict; + public final static SAMSequenceDictionary bareBoneHg38SAMSeqDict; static { final List hg38Chromosomes = new ArrayList<>(); final String hg38ChrBareBoneListFile = GATKBaseTest.toolsTestDir + "/spark/sv/utils/hg38ChrBareBone.txt"; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SegmentedCpxVariantSimpleVariantExtractorUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SegmentedCpxVariantSimpleVariantExtractorUnitTest.java new file mode 100644 index 00000000000..f0b7132df4e --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/SegmentedCpxVariantSimpleVariantExtractorUnitTest.java @@ -0,0 +1,745 @@ +package org.broadinstitute.hellbender.tools.spark.sv.discovery.inference; + +import com.google.common.collect.Sets; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.vcf.VCFConstants; +import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.SimpleSVType; +import org.broadinstitute.hellbender.tools.spark.sv.utils.SVVCFWriter; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.test.VariantContextTestUtils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; +import scala.Tuple3; + +import java.util.*; +import java.util.stream.Collectors; + +import static org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider.b38_reference_chr20_chr21; +import static org.broadinstitute.hellbender.tools.spark.sv.discovery.SVDiscoveryTestUtilsAndCommonDataProvider.b38_seqDict_chr20_chr21; +import static org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.CpxVariantCanonicalRepresentation.UNMAPPED_INSERTION; +import static org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.SegmentedCpxVariantSimpleVariantExtractor.*; +import static org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.SegmentedCpxVariantSimpleVariantExtractor.MultiSegmentsCpxVariantExtractor.compactifyMissingSegments; +import static org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.SegmentedCpxVariantSimpleVariantExtractor.MultiSegmentsCpxVariantExtractor.findAllSegments; +import static org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants.*; + +public class SegmentedCpxVariantSimpleVariantExtractorUnitTest extends GATKBaseTest { + + private static final ZeroAndOneSegmentCpxVariantExtractor zeroAndOneSegmentCpxVariantExtractor = new ZeroAndOneSegmentCpxVariantExtractor(); + private static final MultiSegmentsCpxVariantExtractor multiSegmentsCpxVariantExtractor = new MultiSegmentsCpxVariantExtractor(); + + private List caseForZeroAndOneSegmentCalls() { + final List data = new ArrayList<>(20); + + // NOTE ALL VARIANTS HERE ARE ARTIFICIALLY PUT ON CHR20 AND 21 BECAUSE WE NEED REFERENCE, SO SOME NON-CRITICAL VALUES MAY LOOK NON-SENSE + + // 1. zero segment -> insertion + VariantContext complex = makeTestComplexVariant(new SimpleInterval("chr20:51740560-51740561"), 549, + "AT", "AATTAGGCTGGGCACAGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGTGGATCACCTGAGGTCGGGAGTTCAAGACCAGCCTAACCAACATGAGGAAACCCCGTCTCTACTAAAAATACAAAATTAGATGGGCGTGGTGGCGCATGCCTGTAATTCAAACTACTTGGAAGGCTGAGGCAGGAGAATTGCTTGAACCCAGGAGACAGAGGTTGTGGTAAGCCAAGATCATGCCATTGTACTCCAGCATGGGCAACAAGAGTGAGACTCCATCTCAAAAAAAAAAAAAATTAGCCAGGCGTGGTGGTGGGCACCTGTAATCCCAGCTACCCTGGAGACTGAGGCAGAAGAATCGCTTGAACCCAGGAGGCGGAGATTGCAGTGAGCCAAGATTACGCCACTGCACTCCAGCCTGGGCACCAAGAGCAAAACCCTGTCTCAAAAAAATTAACAAATAAAAAGATTTCTGTCTGCCACACGGCTGGTCCATGTGTAAAGACACATTCCTGTTGGTTTTATGTGTCTTGAATTCTAATGGGT", + Arrays.asList("asm028558:tig00002","asm028558:tig00003"), Collections.emptyList(), + Arrays.asList("-chr18:11642876-11642927","UINS-496")); + data.add(new Object[]{complex, b38_reference_chr20_chr21, zeroAndOneSegmentCpxVariantExtractor, + Collections.singletonList(makeInsertion("chr20",51740560, 51740560, 549, Allele.create("A", true)) + .attribute(EVENT_KEY, "CPX_chr20:51740560-51740561") + .attribute(CONTIG_NAMES, "asm028558:tig00002,asm028558:tig00003").make()) + }); + + // 2. one segment -> with inversion + complex = makeTestComplexVariant(new SimpleInterval("chr21:402806-402905"), 0, + "GAGTCTTACTCTATTGGGCAGGCTGGAGTACAGCGGTGAAATCATGGCTCACTGCAGCCTCGATGTCCTGGCCTCAAACCATCCCCCTGCTTCAGCCTCC", "GAGGCTGAAGCAGGGGGATGGTTTGAGGCCAGGACATCGAGGCTGCAGTGAGCCATGATTTCACCGCTGTACTCCAGCCTGCCCAATAGAGTAAGACT", + Collections.singletonList("asm002252:tig00003"), Collections.singletonList(new SimpleInterval("chr21:402807-402904")), + Collections.singletonList("-1")); + data.add(new Object[]{complex, b38_reference_chr20_chr21, zeroAndOneSegmentCpxVariantExtractor, + Collections.singletonList(makeInversion(new SimpleInterval("chr21:402807-402904"), Allele.create("N", true)) // THE REF ALLELE N HERE IS BECAUSE OF COORDINATE MESSING on TEST DATA MENTIONED ABOVE + .attribute(EVENT_KEY, "CPX_chr21:402806-402905") + .attribute(CONTIG_NAMES, "asm002252:tig00003").make()) + }); + + // 3. one segment -> when deletion is not allowed + + // 3.0 -> new material is not long enough (49 bp, boundary cases) + complex = makeTestComplexVariant(new SimpleInterval("chr21:402806-402905"), 0, + "GAGTCTTACTCTATTGGGCAGGCTGGAGTACAGCGGTGAAATCATGGCTCACTGCAGCCTCGATGTCCTGGCCTCAAACCATCCCCCTGCTTCAGCCTCC", "AAAAAAAAAAAAAAAAAAAAAAAAGAGTCTTACTCTATTGGGCAGGCTGGAGTACAGCGGTGAAATCATGGCTCACTGCAGCCTCGATGTCCTGGCCTCAAACCATCCCCCTGCTTCAGCCTCCAAAAAAAAAAAAAAAAAAAAAAAAA", + Collections.singletonList("asm002252:tig00003"), Collections.singletonList(new SimpleInterval("chr21:402807-402904")), + Arrays.asList("UINS-24", "1", "UINS-25")); + data.add(new Object[]{complex, b38_reference_chr20_chr21, zeroAndOneSegmentCpxVariantExtractor, + Collections.emptyList() + }); + + // 3.1 -> ...., 1 + complex = makeTestComplexVariant(new SimpleInterval("chr20:18675721-18675877"), 408, + "TATGTGTATATTTACACACATATATATGTAAATATACCTATGTGTATATTTACACACATATATATGTGTAAATATACCTATGTGTATATTTACACACATATATGTAAATATACCTATGTGTATATTTACACATATATATGTAAATATACCTATGTGT", "TATGTGTATATTTACACACATATATATGTAAATATACCTATGTGTATATTTACACACATATATATGTGTAAATATACCTATGTGTATATTTACACACATATATATGTGTAAATATACCTATGTGTATATTTACACATATATATGTAAATATACCTATGTGTATGTTTACACATATATATGTAAATATACCTATGTGTATGTTTACACATATATATGTGTAAATATACCTATGTGTATGTTTACACATATATATGTGTAAATATACCTATGTGTATGTTTACACATATATGTAAATATACCTATGTGTATGTTTACACATATATGTGTAAATATACCGATGTGTATGTTTACACATATATGTGTAAATATACCTATGTGTATGTTTACACATATATGTGTAAATATACCTATGTGTATGTTTACACATATATATGTGTAAATATACCTATGTGTATGTTTACACATATATATGTGTAAATATACCTATGTGTGTGTTTACACATATATATGTGTAAATATACCTATGTGTGTGTTTACACATATATATGTAAATATACCTATGTGT", + Collections.singletonList("asm028012:tig00004"), Collections.singletonList(new SimpleInterval("chr20:18675721-18675877")), + Arrays.asList("1","UINS-28","1","UINS-64","1")); + data.add(new Object[]{complex, b38_reference_chr20_chr21, zeroAndOneSegmentCpxVariantExtractor, + Collections.singletonList(makeInsertion("chr20", 18675720, 18675720, 408, Allele.create("A", true)) + .attribute(EVENT_KEY, "CPX_chr20:18675721-18675877").attribute(CONTIG_NAMES, "asm028012:tig00004").make()) + }); + + // 3.2 -> 1, ..... + complex = makeTestComplexVariant(new SimpleInterval("chr20:64096905-64097041"), 318, + "CCACCATCATCACCATCACCACTATCACCACCACCACCATCATTACCATCATCATCACGACCATCACCACCATCATCACCATCACCACTATCACCACCACCACCATCATCACCATCACCATCATCACAGTCATCACC", "CCACCATCATCACCATCACCACTATCACCACCACCACCATCATCACCATCACCATCATCACAGTCATCACTGTCACCATCATCACCATCCTCACTATCACCACCACCACCATCATCACCATCACCATCATCACAGTCATCACCGCCACCATCATCACCATCCTCACTATCACCACCACCACCATCATCACCATCACCATCATCACAATCATCACCGTCACCATCATCACCATCCTCACTATCACCACCACCACCATCATCACCATCACTATCATCACAGTCATCACCGTCACCATCATCACCATCCTCACTATCACCACCACTACCATCATCATCACATTCATCATCACTATTACCATCATCATCACCACCATCACCATCACTATCACCACCATCATTACATTTGTCACCATCACCACCATTATCACCATCACCGCTATCACCACCACCACCGTC", + Collections.singletonList("asm028821:tig00001"), Collections.singletonList(new SimpleInterval("chr20:64096905-64097041")), + Arrays.asList("1","1","UINS-166")); + data.add(new Object[]{complex, b38_reference_chr20_chr21, zeroAndOneSegmentCpxVariantExtractor, + Collections.singletonList(makeInsertion("chr20", 64097041, 64097041, 318, Allele.create("A", true)).attribute(EVENT_KEY, "CPX_chr20:64096905-64097041").attribute(CONTIG_NAMES, "asm028821:tig00001").make()) + }); + + // 3.3 -> ...., 1, .... + complex = makeTestComplexVariant(new SimpleInterval("chr20:51740560-51741035"), 599, + "ATTTTGTGTTGTTGTTTTTGTTTTTTGAGACAAGGTCTCATTCTGTCACCCAGGCAGGACTGTGGTGGCACCATCATGGCTCAGCGCAGCCTCCTTTTCCCCAGGCTCAAGTGATCCTCTTGCCTCAGCCTCCCACGTGGCTGGGACTACAGGTGTGTACCACCACTCCCGGATAATTTTTTTTATTTTTTATTTTTAGTAAAGACAGTCTCACTATGTTGCCCAGGCTGGTCTCCAACTCCTGGTCTCAAGCAATCCTCCCAGTTCAGCCTCTCAAAGTGCTGGGATTACAGATGTGAGCCACAATACCCGGCCCCAATTCTAATGTTTAAAGAGTACAGTCTACACCTTAAAGCCTGCATTTTATCATCCTGTCCTCACTGCTCTGACTTCTTTACAGTTGTGCTGTCCACCTTGGCGGCTTCTACCACATGTGGCTATTTTAAGTTTCAATTAATTAAAATTAAATTTTAATT", "AATTAGGCTGGGCACAGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGTGGATCACCTGAGGTCGGGAGTTCAAGACCAGCCTAACCAACATGAGGAAACCCCGTCTCTACTAAAAATACAAAATTAGATGGGCGTGGTGGCGCATGCCTGTAATTCAAACTACTTGGAAGGCTGAGGCAGGAGAATTGCTTGAACCCAGGAGACAGAGGTTGTGGTAAGCCAAGATCATGCCATTGTACTCCAGCATGGGCAACAAGAGTGAGACTCCATCTCAAAAAAAAAAAAAATTAGCCAGGCGTGGTGGTGGGCACCTGTAATCCCAGCTACCCTGGAGACTGAGGCAGAAGAATCGCTTGAACCCAGGAGGCGGAGATTGCAGTGAGCCAAGATTACGCCACTGCACTCCAGCCTGGGCACCAAGAGCAAAACCCTGTCTCAAAAAAATTAACAAATAAAAAGATTTCTGTCTGCCACACGGCTGGTCCATGTGTAAAGACACATTCCTGTTGGTTTTATGTGTCTTGAATTCTAATGGGTTTTGTGTTGTTGTTTTTGTTTTTTGAGACAAGGTCTCATTCTGTCACCCAGGCAGGACTGTGGTGGCACCATCATGGCTCAGCGCAGCCTCCTTTTCCCCAGGCTCAAGTGATCCTCTTGCCTCAGCCTCCCACGTGGCTGGGACTACAGGTGTGTACCACCACTCCCGGATAATTTTTTTTATTTTTTATTTTTAGTAAAGACAGTCTCACTATGTTGCCCAGGCTGGTCTCCAACTCCTGGTCTCAAGCAATCCTCCCAGTTCAGCCTCTCAAAGTGCTGGGATTACAGATGTGAGCCACAATACCCGGCCCCAATTCTAATGTTTAAAGAGTACAGTCTACACCTTAAAGCCTGCATTTTATCATCCTGTCCTCACTGCTCTGACTTCTTTACAGTTGTGCTGTCCACCTTGGCGGCTTCTACCACATGTGGCTATTTTAAGTTTCAATTAATTAAAATTAAATTTTAATTTAATTAATTAAAAATAAATTTTAATTAATTAATTAAAAATAAATTTTAAT", + Arrays.asList("asm028558:tig00000", "asm028558:tig00001"), Collections.singletonList(new SimpleInterval("chr20:51740561-51741034")), + Arrays.asList("-chr18:11642876-11642927","UINS-496","1","UINS-49")); + data.add(new Object[]{complex, b38_reference_chr20_chr21, zeroAndOneSegmentCpxVariantExtractor, + Arrays.asList(makeInsertion("chr20", 51740560, 51740560, 549, Allele.create("A", true)).attribute(EVENT_KEY, "CPX_chr20:51740560-51741035").attribute(CONTIG_NAMES, "asm028558:tig00000,asm028558:tig00001").make(), + makeInsertion("chr20", 51741034, 51741034, 50, Allele.create("T", true)).attribute(EVENT_KEY, "CPX_chr20:51740560-51741035").attribute(CONTIG_NAMES, "asm028558:tig00000,asm028558:tig00001").make()) + }); + + // 4. one segment -> whenNoInvAndNoAsIsAppearance + + // 4.1 -> deletion but no insertion + complex = makeTestComplexVariant(new SimpleInterval("chr20:20269131-20269199"), -34, + "ATATATATATATATATACACACACACACACACACATACATATATGTATATACACACACATATATACATA", "ACACACACACACACACACACACACACACACACACA", + Collections.singletonList("asm028026:tig00000"), Collections.singletonList(new SimpleInterval("chr20:20269131-20269199")), + Collections.singletonList("-chrX:137700299-137700331")); + data.add(new Object[]{complex, b38_reference_chr20_chr21, zeroAndOneSegmentCpxVariantExtractor, + Collections.singletonList(makeDeletion(new SimpleInterval("chr20:20269131-20269198"), Allele.create("A", true)).attribute(EVENT_KEY, "CPX_chr20:20269131-20269199").attribute(CONTIG_NAMES, "asm028026:tig00000").make()) + }); + // 4.2 -> deletion and insertion + complex = makeTestComplexVariant(new SimpleInterval("chr20:54849491-54849615"), 15, + "CAAATCTCATGTGAAATGTATCCCCAGTGTGGAGGGGGCAGATCCTCATAATGGCTTGGGCCCTTCCATGGTAATAGTGAGTCTTGCTCTGTAGTTCATAGAGAGCTGATTGTTAAAGGAGTCTG", "CCAAATCTCATGTTGAAATGTAATCCCCAGTGTTGGAGGGGGGCAGATCCCTCATGAATGGCTTGGTGCCCTTCCCATGGTAATGAGTGAGTTCTTGCTCTGTTAGTTCATGAGAGAGCTGATTGTTTAAAGGAGTCTGG", + Collections.singletonList("asm028586:tig00000"), Collections.singletonList(new SimpleInterval("chr20:54849491-54849615")), + Arrays.asList("UINS-36","-chr14:58474127-58474172","UINS-54")); + data.add(new Object[]{complex, b38_reference_chr20_chr21, zeroAndOneSegmentCpxVariantExtractor, + Arrays.asList(makeDeletion(new SimpleInterval("chr20:54849491-54849614"), Allele.create("C", true)).attribute(EVENT_KEY, "CPX_chr20:54849491-54849615").attribute(CONTIG_NAMES, "asm028586:tig00000").make(), + makeInsertion("chr20", 54849491, 54849491, 140, Allele.create("c", true)).attribute(EVENT_KEY, "CPX_chr20:54849491-54849615").attribute(CONTIG_NAMES, "asm028586:tig00000").make()) + }); + + // 4.3 -> fat insertion + complex = makeTestComplexVariant(new SimpleInterval("chr20:12558793-12558810"), 133, + "AAAAAAAAAAAAAAAAAA", "AGACAAAGAAACAAACAAACAAAACAAAACTATATATATATATATATACACACACACACACACACACACATTATTAAAATTCAGATTTAAATAAACTGACTATAAAAAAGTACTTTTGAAACAAAAACTTTAATCATGATTATATATATTA", + Collections.singletonList("asm027960:tig00003"), Collections.singletonList(new SimpleInterval("chr20:12558793-12558810")), + Arrays.asList("-chrX:99014092-99014129","UINS-101")); + data.add(new Object[]{complex, b38_reference_chr20_chr21, zeroAndOneSegmentCpxVariantExtractor, + Collections.singletonList(makeInsertion("chr20", 12558793, 12558809, 133, Allele.create("AAAAAAAAAAAAAAAAA", true)).attribute(EVENT_KEY, "CPX_chr20:12558793-12558810").attribute(CONTIG_NAMES, "asm027960:tig00003").make()) + }); + + return data; + } + private List caseForMultiSegmentsCalls() { + final List data = new ArrayList<>(20); + + // case 1: long stretch + + // case 1.1: front insertion only + SimpleInterval affectedInterval = new SimpleInterval("chr21:21264944-21265096"); + int svLen = 215; + String refAllele = "TATATATATGTGTATGTGTATATATACACATATATATTATATATGTGTATGTGTATATATACACATATATATTATATATGTGTCTGTGTATATATGTACACATATATACTATATATGTGTATGTGTATATATATACACACATATATTATATAT"; + String altSeq = "TATATATATGTGTATGTGTATATATACACATATATATTATATATGTGTATGTGTATATATACACATATATATTATATATGTGTATGTGTATATATACACATATATATTATATATGTGTATGTGTATATATATACACATATATATTATATATGTGTATGTGTATATATATACACATATATATTATATATGTGTATATGTATATATACACATATATATTATATATATATGTGTCTGTATATATATACACATATATATTATATATATGTGTCTGTGTATATATATACACATATATATGTGTCTGTGTATATATGTACACATATATACTATATATGTGTATGTGTATATATATACACACATATATTATATAT"; + List ctgNames = Arrays.asList("asm029034:tig00000","asm029034:tig00001"); + List refSegments = Arrays.asList(new SimpleInterval("chr21:21264944-21264988"), new SimpleInterval("chr21:21264988-21265052"), new SimpleInterval("chr21:21265052-21265096")); + List altArrangements = Arrays.asList("1","2","3","2","1","2","3"); + VariantContext complex = makeTestComplexVariant(affectedInterval, svLen, refAllele, altSeq, ctgNames, refSegments, altArrangements); + List expectedSimple = Collections.singletonList(makeInsertion("chr21", 21264943, 21264943, 221, Allele.create("G", true)).attribute(EVENT_KEY, "CPX_chr21:21264944-21265096").attribute(CONTIG_NAMES, ctgNames).make()); + data.add(new Object[]{complex, b38_reference_chr20_chr21, multiSegmentsCpxVariantExtractor, expectedSimple}); + + // case 1.2: rear insertion only + affectedInterval = new SimpleInterval("chr20:61919906-61920109"); + svLen = 541; + refAllele = "TCGTGATTATGTGGAAGCGTGGTGTCACGGTGATTGCGTGGAAGCGTGTTGTGATTGTGTGGAAGCGTGGTATCGCGGTGATTGCATGGAAGTGTGGTGTCACAGTGATTGCGTGGAAGCGTGTCGTGATTGTGTGGAAGCATGGTATCGTGATTGTGTGGAAGCGTGGTGTCACGGTGATTGCGTGGAAGCATGTTGTGATTG"; + altSeq = "TCGTGATTATGTGGAAGCGTGGTGTCACGGTGATTGCGTGGAAGCGTGTTGTGATTGTGTGGAAGCGTGGTATCGCGGTGATTGCATGGAAGTGTGGTGTCACAGTGATTGCGTGGAAGCGTGTCGTGATTGTGTGGAAGCATGGTATCGTGATTGTGTGGAAGCGTGGTGTCACGGTGATTGCGTGGAAGCATGTTGTGATTGTGTGGAAGCGTGGTGTGATTGTGTGGAAGCATGGTATCGTGATTGTGGAAGCGTGGTATCGCGGCGATTGTGTGGAAGCGTGGTGTCGCAGTGATTGCGTGGAAGCATGTTGTGATTGTGTGGAAGCGTGGTATCGTGATTGTGTGGAAGCATGGTGTCGTGATTGTGTGGAAGCATGTCGTGATTGTGTGGAAGTGTGATGTCACGGTGATTGCGTGGAAGCGTGTTGTGATTGTGTGGAAGCGTGGTATCGTGATTGGAAGTGTGGTGTCACGCTGATTGCATGGAAGTGTGTTGTGATTGTGTGGAAGCGTGATATCGCAGTGATTGTGTGGAAGCGTGGTGTCACGGTGATTGTGTGGAAGCGTGGTGTCACGGTGATTGCGTGGAAGCGTGTTGTGATTGTGTGGAAGCGTGGTATCGTGATCGGAAGCGTGGTGTTGCGGTGATTGCATGGAAGCATGTTGTGATTGTGTGGAAGCATGGTATCGTGATTGTCTGGAAGCATGGTGTCATGGTGATTGGAAGTGTGTCGTGATTG"; + ctgNames = Arrays.asList("asm028707:tig00000"); + refSegments = Arrays.asList(new SimpleInterval("chr20:61919906-61919908"), new SimpleInterval("chr20:61919908-61920054"), new SimpleInterval("chr20:61920054-61920109")); + altArrangements = Arrays.asList("1","2","3","UINS-177","1","2","2","3"); + complex = makeTestComplexVariant(affectedInterval, svLen, refAllele, altSeq, ctgNames, refSegments, altArrangements); + expectedSimple = Collections.singletonList(makeInsertion("chr20", 61920109, 61920109, 531, Allele.create("G", true)).attribute(EVENT_KEY, "CPX_chr20:61919906-61920109").attribute(CONTIG_NAMES, ctgNames).make()); + data.add(new Object[]{complex, b38_reference_chr20_chr21, multiSegmentsCpxVariantExtractor, expectedSimple}); + + // case 1.3: front and rear insertion + affectedInterval = new SimpleInterval("chr20:38653054-38653283"); + svLen = 485; + refAllele = "TGGTGGTGGTGGTGATGGAAATGATGATGTTAGTTGTGGTGTTGATGATGGTAATGATAATGATTGTGATGGTGGTGTTGGTGGTGCTGGTGATGATAATGATGGTGGTGGTGGTGGTGGTGATGATGGTGGTGGTGGTGATGGAAATGATGATGATGTTAGTTGTGGTGTTGATGATGGTAATGATAATGATTGTGATGATGGTGGTGGTGGTGGTGATGATGGTGATG"; + altSeq = "TGGTGGTGGTGGTGATGGAAATGATGATGTTAGTTGTGGTGTTGATGATGGTAATGATAATGATTGTGATGGTGGTGTTGGTGGTGCTGGTGATGATAATGATGGTGGTGGTGGTGGTGGTGATGATGGTGGTGGTGGTGATGGAAATGATGATGATGTTAGTTGTGGTGTTGATGATGGTAATGATAATGATTGTGATGGTGGTGTTGGTGGTGGTGATAATGATGGTAGTGGTGGTGGTGATGATGGTGATGATGATTATGATGGTGTGTTGGTGGTGCTGGTGATGATAATGGTGGTGGTGGTGGTGGTGATGGAAATGATGATGATGTTAATTGTGGTGTTGATGATGGTAATGATAATGATTGTGATGGTGGTGTTGGTGGTGCTGGTGATGATAATGGTGGTGGTGGTGGTGATGGTGATGATGATTATGATGGTGGTGGTGGTGGTGGTGGTGCTGGTGATAGTGGTGGTGGTGGTGCTGGTGATGATAATGGTGGTGGTGGTGATGATGGTGATGATGATTATGATGGTGGTGTTGGTGGTGCTGGTGATGATAATCATGCTGGTGGTGGTGGCGTTGATGATGGTGACAGTAGTGGTGATGATGGTGGTGGTGGTGATGGAAATGATGATGATGTTAGTTGTGGTGTTGATGATGGTAATGATAATGATTGTGATGATGGTGGTGGTGGTGGTGATGATGGTGATG"; + ctgNames = Arrays.asList("asm028418:tig00000"); + refSegments = Arrays.asList(new SimpleInterval("chr20:38653054-38653113"), new SimpleInterval("chr20:38653113-38653145"), new SimpleInterval("chr20:38653145-38653179"), new SimpleInterval("chr20:38653179-38653273"), new SimpleInterval("chr20:38653273-38653283")); + altArrangements = Arrays.asList("1","2","3","4","3","1","2","3","4","5","2","3","4","5"); + complex = makeTestComplexVariant(affectedInterval, svLen, refAllele, altSeq, ctgNames, refSegments, altArrangements); + expectedSimple = Arrays.asList(makeInsertion("chr20", 38653053, 38653053, 259, Allele.create("A", true)).attribute(EVENT_KEY, "CPX_chr20:38653054-38653283").attribute(CONTIG_NAMES, ctgNames).make(), + makeInsertion("chr20", 38653283, 38653283, 175, Allele.create("G", true)).attribute(EVENT_KEY, "CPX_chr20:38653054-38653283").attribute(CONTIG_NAMES, ctgNames).make()); + data.add(new Object[]{complex, b38_reference_chr20_chr21, multiSegmentsCpxVariantExtractor, expectedSimple}); + + // case 2: possibly inversion + + // case 2.1: both as-is and inverted, hence no inversion + affectedInterval = new SimpleInterval("chr20:23122561-23122996"); + svLen = -293; + refAllele = "CTGTCATGTCACCGTGTGGGAGGGCTTGCAGGTGAAGTGGTCTGGGAGGGGTCCCCCAGACAAAGCCAAGGTTCTGAGAGTTGGCCCGAACACTGCTGGATTCCACGGCGGGGGCTTGGGACAGCCGGACTCAGAGACAGTAGGAGGTGACAATGTATATACATCACAATGATCACAATATAGCCCCACACGTGCCTTCCCCACCAACAGTCAGCTTGCCATGTCCCTTCCCCGACATCCCCATCTTAGTCCCAACACAGAGGCTCCCTCTGCCCTGTGTCTGTTTAGCCAGGATGCCACGGTTCTTCCCCCTTCTCCCATGTCCTCCTCCACACACCCTTCAAGGGCAACCCCAGAGATTTCTCCTCCATGATTTCTAAAGTCAGTGGTAAATCTGTTCCTCTTTAAAATCTCATCACACTGTTCACCACTTCCC"; + altSeq = "CTGTCATGTCACCGTGTGGGAGGGCTTGCAGGTGAAGTGGTCTGGGAGGGGTCCCCCAGACAAAGCCAAGGTTCTGAGAGTTGGCCCGAACACTGCTGGATTCCACTTCACCTGCAAGCCCTCCCACACGGTGACATGACAGC"; + ctgNames = Arrays.asList("asm028059:tig00000","asm028059:tig00001"); + refSegments = Arrays.asList(new SimpleInterval("chr20:23122561-23122596"), new SimpleInterval("chr20:23122596-23122666"), new SimpleInterval("chr20:23122666-23122996")); + altArrangements = Arrays.asList("1","2","-1"); + complex = makeTestComplexVariant(affectedInterval, svLen, refAllele, altSeq, ctgNames, refSegments, altArrangements); + expectedSimple = Collections.singletonList(makeDeletion(new SimpleInterval("chr20:23122666-23122995"), Allele.create("C", true)).attribute(EVENT_KEY, "CPX_chr20:23122561-23122996").attribute(CONTIG_NAMES, ctgNames).make()); + data.add(new Object[]{complex, b38_reference_chr20_chr21, multiSegmentsCpxVariantExtractor, expectedSimple}); + + // case 2.2: no as-is but with inverted, but too short (coordinate and allele massage, original event CPX_chr20:34732145-34733344) + affectedInterval = new SimpleInterval("chr20:34732145-34733344"); + svLen = -1139; + refAllele = "GCTTCATTTTTTTTCATTTGTCCAACAGGAATAAAAATATCTGCTCTCCTGACTTCCTGTGGTTATTGCAAGAAGCAAATAAGGCAATTGCTGTGAAATGATTTGGAAAATAAGCATCCTCCAAATGCAAAGAAGTGTTATTATTTAACAGAGTTTTCTGACTCTAGTTAGGAACTCATCCCATTGACTTTAGAATTGACATGAAAATTTATGCTTATATCCCTGCCTGTAAATAATACTTACTCTTTTGATCCATTTAAATTTTAACATAATCTCAAACTTACAGATGAGTCGCAAAAATGGTACCTTTACCTAGCTTTCCCACGTGTTTATATTGTGCCTAATTTTGTATATATTTCTGAATTATTCAAGAATAAACTGGTGGCTGGGCACAGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCTGAGGCGGGTGGATCACCTGAGGTCAGGAGTTAGAGACCAGCATGATCCACATGGTGAAAACCCGCCTCTACTAACAATACAAAATTAGCTGGACGTGGTGGCACATGTCGGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTGAACCTGGGAGGCAGAGGTTGCAGTGAGCCAAGATCATGCCATTGCACTCCAGCCTAGGCAACAAGAGTGAAACTCCGTATCAAAAAAAAAAGAGTAAACTGGAGATAATGTGTGCTTTACCAGTCCACAGGGAGTTCAACCCTGTTAACATCTTGATTTCAGCCCAGTGAAACTGGCTTCATCCTCCTGACCTTCAGAGCGATAAGAAAATAAATGTGTGTTATTTTAAGCCACCAAATTTGTGGTAATTCACCATAGCAGCCGTAGGAAACGAATATAGTCCATAATCAAATTTTGGCAATTGTCTAAATAGTATTCATTTAACCCATTTCAGTTCTCAAGATAATTTTCAATTCAGGATCATATTTTTTCAATTCAGGATCATGTGTTTTTAGTCTAATCTGGAACAGTTCTCCAGCCTTACTTTGTCTTTCTTGACTTGGACATTTTTGGAAAGTACAGCTATTGGCCACATGTGGTGCCTCATGCCTGTAATCTCAATACTTTGGGAGACTGAGGAGGGAGGATCACCTGAGGCTAGGAGTTCAAGACCAGCCTGGCCAACGTGAGGAACCCCCCCGTCTCTACTAAAAATACTCAAGATCCACCTGGACAA"; + altSeq = "GTTGTCCAGGTGGATCTTGAGTATTTTTAGTAGAGACGGGGGGTTCAATTAACTCTTCCAA"; + ctgNames = Arrays.asList("asm010456:tig00000"); + refSegments = Arrays.asList(new SimpleInterval("chr20:34732145-34733303"), new SimpleInterval("chr20:34733303-34733342"), new SimpleInterval("chr20:34733342-34733344")); + altArrangements = Arrays.asList("-3","-2","UINS-14","3"); // segment 1 deleted, segment 2 appear inverted but length too short + complex = makeTestComplexVariant(affectedInterval, svLen, refAllele, altSeq, ctgNames, refSegments, altArrangements); + expectedSimple = Collections.singletonList(makeDeletion(new SimpleInterval("chr20:34732145-34733302"), Allele.create("A", true)).attribute(EVENT_KEY, "CPX_chr20:34732145-34733344").attribute(CONTIG_NAMES, ctgNames).make()); + data.add(new Object[]{complex, b38_reference_chr20_chr21, multiSegmentsCpxVariantExtractor, expectedSimple}); + + // case 2.3: no as-is but with inverted, and inverted sequence long enough + affectedInterval = new SimpleInterval("chr21:26001843-26002386"); + svLen = -1; + refAllele = "ATCACATCTAGGCATGGCTTTGAGCTTCACAGGCCAGAAGCTGCTCTGTCAGTAACTCAGAGGGGATATGGTCCCTAGACCCAATTCACTTTTAATAAATCAGGGTGCCAGTCCTTTAATTTTGGAAGATAAATAAAATTGCGAAGTAGATGGGATACTTACGTCAACAAGTTGGTTGGGTTTCATCCTGTAAGAGTATTGTGTACAGAAATCCAATTTGCTTTGGGAGCGTGGACTTTGAAAGCAGCAGTACTGAGTGAGATGGCCTTAATGATGGATGATAATGAGGTTGATGCTTTCGTAGCCACGTTTCCATCTGAAAACCACGTGAAGAGTGTCAGATCATCTTCATGTCCGTTGCATGGCGCATTTCTCCTGCGGAGTGTTTGACACCTTCTGGGCTTAGTGCTGACTGTCCCTCCTGTTGACATCATCGTGATGGTAGCCACCTCTGGTTTTACCAGTACTTTATTGCATCTACTGAAAGAGCAACGTGTATAGGGAAGTAAAACAATAGTTAGTCCCCATTTGTATTGACATTGTG"; + altSeq = "ACAATGTCAATACAAATGGGGACTAACTATTGTTTTACTTCCCTATACACGTTGCTCTTTCAGTAGATGCAATAAAGTACTGGTAAAACCAGAGGTGGCTACCATCACGATGATGTCAACAGGAGGGACAGTCAGCACTAAGCCCAGAAGGTGTCAAACACTCCGCAGGAGAAATGCGCCATGCAACGGACATGAAGATGATCTGACACTCTTCACGTGGTTTTCAGATGGAAACGTGGCTACGAAAGCATCAACCTCATTATCATCCATCATTAAGGCCATCTCACTCAGTACTGCTGCTTTCAAAGTCCACGCTCCCAAAGCAAATTGGATTTCTGTACACAATACTCTTACAGGATGAAACCCAACCAACTTGTTGACGTAAGTATCCCATCTACTTCGCAATTTTATTTATCTTCCAAAATTAAAGGACTGGCACCCTGATTTATTAAAAGTGAATTGGTTCTAGGGACCATATCCCCTCTGAGTTACTGACAGAGCAGCTTCTGGCCTGTGAAGCTCAAAGCCATGCCTAGATGTGAG"; + ctgNames = Arrays.asList("asm029075:tig00000"); + refSegments = Arrays.asList(new SimpleInterval("chr21:26001844-26002384"), new SimpleInterval("chr21:26002384-26002386")); + altArrangements = Arrays.asList("-1"); + complex = makeTestComplexVariant(affectedInterval, svLen, refAllele, altSeq, ctgNames, refSegments, altArrangements); + expectedSimple = Collections.singletonList(makeInversion(new SimpleInterval("chr21:26001844-26002384"), Allele.create("T", true)).attribute(EVENT_KEY, "CPX_chr21:26001843-26002386").attribute(CONTIG_NAMES, ctgNames).make()); + data.add(new Object[]{complex, b38_reference_chr20_chr21, multiSegmentsCpxVariantExtractor, expectedSimple}); + + // case 3: possibly deletion + + // case 3.1: deleted range too short + + affectedInterval = new SimpleInterval("chr21:23428920-23429023"); + svLen = 131; + refAllele = "TTTATATAAATATATATAAATATATAATATATAATAATATAATATAATATATATATAATATAATATAATATAATATATAATATATATTACATAATATATTATAT"; + altSeq = "TATAAATATATATAATAATATATAATTATATATTATATTATATAATATAATAATATATATTATAATACATTATATAATATATTATAATATATATAATATAATATATTATATTATATAATATATATTACATAATATATTATATTATATAATATATATTACATAATATATTATATTATATAATATATATTACATAATATATTATATTATATAATATATATTACATAATATATTATAT"; + ctgNames = Arrays.asList("asm029052:tig00000","asm029052:tig00001"); + refSegments = Arrays.asList(new SimpleInterval("chr21:23428920-23428968"), new SimpleInterval("chr21:23428968-23428998"), new SimpleInterval("chr21:23428998-23429023")); + altArrangements = Arrays.asList("UINS-84","2","3","UINS-5","2","2","3"); + complex = makeTestComplexVariant(affectedInterval, svLen, refAllele, altSeq, ctgNames, refSegments, altArrangements); + expectedSimple = Collections.singletonList(makeInsertion("chr21", 23428920, 23428920, 85, Allele.create("T", true)).attribute(EVENT_KEY, "CPX_chr21:23428920-23429023").attribute(CONTIG_NAMES, ctgNames).make()); + data.add(new Object[]{complex, b38_reference_chr20_chr21, multiSegmentsCpxVariantExtractor, expectedSimple}); + + // case 3.2: deleted range long enough (tested together with 2.1) + + // case 4: not long stretch but front and rear insertion possible + + // case 4.1: insertion length not big enough (coordinate and allele massage, original event CPX_chr10:13062977-13063278) + affectedInterval = new SimpleInterval("chr20:13062977-13063278"); + svLen = 72; + refAllele = "CTGACCAATCAGCACCCTTGGCTCACTGGCTTACCGATTTCATCTCTGACCAATCAGCACTCCTGGCTCACTGGCTTACCAATTTCATCTCTGACCAATCAGCACTCCTGGCTCACTGGCTTACCCCCACCCACCAAGTTATTTTTAAAAACTTTGCTCCCCGAATGTTCGGGGAGACTGATTTGAGTAATAACAAAACCCTGGTCTCCCACACAGCCGGCTCTGTGTGAATTACTCTTTCTCTATTGCAGTTCCCCTGTCTTGATTTATCAGCTCTGTCTAGGCAGTGGGCAAGTTGAACC"; + altSeq = "CTGACCAATCAGCACCCTTGGCTCACTGGCTTACCGATTTCATCTCTGACCAATCAGCACTACTTGCCCACTGCCTAGACAGAGCTGATAAATCAAGACAGGGGAACTGCAATAGAGAAAGAGTAATTCACACAGAGCCGGCTGTGTGGGAGACCAGGGTTTTGTTATTACTCAAATCAGTCTCCCCGAACATTCGGGGAGCAAAGTTTTTAAAAATAACTTGGTGGGTGGGGGGTAAGCCAGTGAGCCAGGAGTGCTGATTGGTCAGAGATGAAATTGGTAAGCCAGTGATCCAGGAGTGCTGATTGGTCAGAGATGAAATCGGTAAGCCAGTGAGCCAGGAGTGCTGATTGGTCAGCACCCTTGGTAACCAC"; + ctgNames = Arrays.asList("asm016524:tig00000"); + refSegments = Arrays.asList(new SimpleInterval("chr20:13062977-13063037"), new SimpleInterval("chr20:13063037-13063272"), new SimpleInterval("chr20:13063272-13063278")); + altArrangements = Arrays.asList("1","-2","-1","UINS-14"); + complex = makeTestComplexVariant(affectedInterval, svLen, refAllele, altSeq, ctgNames, refSegments, altArrangements); + expectedSimple = Arrays.asList(makeInversion(new SimpleInterval("chr20:13063037-13063272"), Allele.create("G", true)).attribute(EVENT_KEY, "CPX_chr20:13062977-13063278").attribute(CONTIG_NAMES, ctgNames).make()); + data.add(new Object[]{complex, b38_reference_chr20_chr21, multiSegmentsCpxVariantExtractor, expectedSimple}); + + // case 4.2: front insertion only (tested together with 3.1) + + // case 4.3: rear insertion only (coordinate and allele massage, original event CPX_chr22:36680290-36680686) + affectedInterval = new SimpleInterval("chr21:36680290-36680686"); + svLen = 318; + refAllele = "TTCATGATCACTGCCACCTGCATCATGGCTATAATTACCACCGCCATCACCATCATCACTACCACCATCATCACCATCATCACCATTACCACCACCATCACTGCCACCATCACTACCACCATCATCACCATCATCACCACCACCACCATCACCACCATCACCATCACCACTTTCATCACCACCATCTTTATCACAGTCATTATTACCACCATCAATCATCACCACCTTCATGATCACTGCCACCTGCATCATGGTTACAATTACTACCACCACCATCAGCACCACCTTCATGATCACCACCACCTGCATCATGGCTATAATTACTACCACCATCACCACCACTAACACCACCATCATTATCACCACCATCACCACCATCTTCATGATCACTGCCACC"; + altSeq = "TTCATGATCACTGCCACCTGCATCATGGCTATAATTACCACCGCCATCACCATCATCACTACCACCATCATCACCATCATCACCATTACCACCACCATCACTGCCACCATCACTACCACCATCATCACCATCATCACCACCACCACCATCACCACCATCACCATCACCACTTTCATCACCACCATCTTTATCACAGTCATTATTACCACCATCAATCATCACCACCTTCATGATCACTGCCACCTGCATCATGGTTACAATTACTACCACCACCATCAGCACCACCTTCATGATCACCACCACCTGCATCATGGCTATAATTACTACCACCATCACCACCACTAACACCACCATCATTATCACCACCATCTTCATGATCACTGCCACCTGCATCATGGCTATAATTACCACCATCATCACCACTATACTACCACCACCATCACCACAACCATCGCTACCACCACCACCACCACCATCACCATCATCACCATCACTACTACTGCCACCACTACCAAAACCACCACCACCACCATCACCACCACCATTGCCACCACTACCATCACCACCATCACCACCATCACCATCACCACCACTACCACCATTACCACCACCACCACCACCACCACCACCATCACCACCACCACCACCATCATCACCACTATC"; + ctgNames = Arrays.asList("asm029759:tig00000","asm029759:tig00001"); + refSegments = Arrays.asList(new SimpleInterval("chr21:36680290-36680331"), new SimpleInterval("chr21:36680331-36680659"), new SimpleInterval("chr21:36680659-36680686")); + altArrangements = Arrays.asList("1","2","1","UINS-249"); + complex = makeTestComplexVariant(affectedInterval, svLen, refAllele, altSeq, ctgNames, refSegments, altArrangements); + expectedSimple = Arrays.asList(makeInsertion("chr21", 36680686, 36680686, 250, Allele.create("C", true)).attribute(EVENT_KEY, "CPX_chr21:36680290-36680686").attribute(CONTIG_NAMES, ctgNames).make()); + data.add(new Object[]{complex, b38_reference_chr20_chr21, multiSegmentsCpxVariantExtractor, expectedSimple}); + + // case 4.4: front and rear insertion (coordinate and allele massage, original event CPX_chr19:8888822-8895655) + affectedInterval = new SimpleInterval("chr20:8888822-8895655"); + svLen = -6179; + refAllele = "GCTTTTGGGATCAGGGCGGTAGGTGCAGATGGCATCCACTTTGGTGGCTGCCCCATCCTTCTTGGGCCTGGGGAGGGCAAGATTGGGGAAAGACAATACACGGATTGCCTAGGGAGGGGTTCTGAGATCCCACAGGGCAGGACAGGAGGGGGCCAGAGGAAATGAGAAGCCTGGTAGAGAAGTGAGAAGCCATACTTCCATTCTCTGAGTGCACTGACTCTTAGCAACTATGAGCAGAATCATGGATGCAAGGATTTTTATAAAATTGCAAAGTGGAAACTGCATTCTTGTAGCATGGGGTAAGGAGAACTCATACTTGAGACAGGTGAAGAGCTAGGTTTGGGGATGAAAGCAAGTGGACGAGGCATGTTTTTCTTGGGAGAAATCCTGGGAACAGAATTGAACCACTTGTGGAAAGGCAGGTAGGGGAAAGGAGGGAATTCAGAGGGATGACCAAGGTAGAAGCTCAGTCCTGCTGATGGGCTCTGCATCTCTCAGTTGAGGTGGGAGGTGGTGGGAACAGGAAGCACATTGGTCTTAGAGCCACTGCCTCCTGGATTCCACCTGGCTGCGGACATCTCCAGGGAGTGCAGAAGGGAAGCAGGTCAAACTGCTCAGATCAGTCAGACTGGCTGTTCTCAGTTCTCACCTGAGCAAGGTCAGTCTGCAGCCAGAGTACAGAGGGCCAACACTGGTGTTCTTGAACACAGGCCTGAGCTGTGGAGGAGGGAGAGGGAGGTGAGTGGGAGGACTGAAGTCATAGGGGTGTGGTAGAAGGGAGGTGGGCAGGGCTGGCAACAGTTGGTGGGGCTGGCATCAGTGGTTGGGGTATGCATATCTGGGGGCAAGATGAGGTAGATGAGATCTTCCATGCTCTGCAATAGGTGAGGTGGGCAGGACTAAACATGGTGAGTGGGGTAGAATGAGGTATGTGGGGCTCTTACCAGACCCTGAAGGACTCTCTCCGTGGTGTTAAACTTTCTAGAGCCAGGGTGATGCATGTTCTCCTCATACCGCAGGTTAGTGATGGTGAAGTTAATTGTGAATAGCACCAGGAGAGGGCTGGCGGCTATAGTGAAGATGGGAAATAAACAGTGTTCAAAAGTAGACTAACACTTAAGAATTTGTGATAGAATTGTTTCTAAACATATGCAACATGATTTAAAGTGACAAAATAAAAGGTTTTCACGCCTCCCTTCACTGTAATGGTACATAGTTCTCATAGAGCATACTAAATAATAGTAGTTAGATGCTAAGCACAGCTCTTGGTGCATATTCAGTATTCCATACTACTAGACATTATCTACTACCTTTGCCAACACCACGATCATCATCAACATCAGCAACACTGTGACCATCGCTATCATTATTGTCATCACCACCAGCATCAGCACCAAAACCATGACCATCATAATCCCTATGATCCCCACCCATCATCATCATTACACCACCAGCACCACAACCATCATCATCCATATAATCACCACTAGCACATGGTCACTAACACCATTATCATATCATTCTTACTATCATAACCACCATCACCACCAACATCATTATCATCATCACCACCATCATTATCCTCATTGTCAACATCATGATCACATTGTCAATTGCCATCAGAACCATCACATCACCACCACCACTATCATCCTTCCCATAATCATCACCACCACTATCATCATCATCACCATCAAAACCACTACCACCATCATCATCATCATCACCATCATCAGCAACACCATCATTGTTGTCATCATGTCCATCATCCCTATCATTATCATCATCATCACAACAATATAACCTTCCTGAGGATAGAGATTTTTATTTCTTTTGTGTTCCCTGCCTTATCCCTAGGGCCCCGAAGAAGCCAGACGCTCAACAAAATTTGTCGGTTGACTTGTCATCACTAGATTCACTAGGACTTTTCATTTTTATTTGAGTGAAAATGGTGGAGTTGTAGGCCAGGTGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGGGGCCAAGGCAGACAGATCACGAGGTCAGGAGTTCGAGATCAGTTTGACCAATATGGTGAAGCTCCACCTCTACTAAAAATATAAAAATTAGCTGGGCATGGTGGCATGCACCTGTAATCCCAGCTACTCAGGAGGTGGAGGCAGGAGAATCGCTTGAACCTAGGAGGCAGAGGTTGCAGTGGGCTGAGATCACGCCATTGCATTCTAGCCTGGGTGACAGAGCAAGACTATGTCTCAAAAAAGAAAAAAAAGAAAGAAAAGAAAATGGCAGAGATGTTGACACTCCTCAGGCATGCTGTAATAGAGATGACACTGAATAGTACTCACTCATGGGTCCAGACAGGGAGGCTGGAGTTCTCGAGGTTGCCAGGTGCATTGTGGAGGTATCAGGAGCTGAGGAGAAGGTTTGCTGTATTAGTGAAAGCAGCAAACCCTCTCATGGCAGAAACACCAGCAGGTAGGAGTATTTATGGCCCTTGCTGCATGTGGGGCAGTATCTCCATCTGGAGCTGGGCCAGAGGGAAAGGGGAATAACTTGTGCTCTCTCTTCCTTCCCTCATGAAGAGGATATAGGGGCATAGAAGGAGAGGTAAGCTCCTGCCCTTTATGATGTAGGATCAGGAGCACTGGAACATCAGTTAATACTCACTTCTGGTGGTCGTCATAGAGCTCTGATGGGTGAAACCTGCATTGAGATGGAGGGACGAGAGTGGGTAAGTGTTAAGGGAGTGGTGGGGTACCCTGGTGCAACAGAAGGAATTTCCCAGAACACGGGCCATTGAAGATTCTCTATCAGCACCCTCAGATTTAGAAAGCAGATCCTGAGGGTAAATTCAGTTTGAGATGAAGAAGAAACCACGAAGTTTTGACCTTAGCCACCACATACAGACAACATGGTTGAGCCTTCAATAGCCAGACGGGAAGTGTGACCTCAGGCCAGAGAATAGAGGCTGTCCAGCAAGGAGGAAGAGACTCCAACCACATCAAAGTTGCTCACCATTGACATAGAGACTGTGCCTGTCCAGGGTGTAGGGGCCCAGCTCAGTGATGCCGTGGGTCAGCTGGCTCAGCTTCCAGTACAGCCGCTCTCTGTCCAGTCCAGGGCTTTTGGGGTCAGGACGATGGGTGCAGACAGCATCCACTCTGGTGGCTGCCCCATCCTTCTCAGGCCTAGGGAGGGCAGATGAAGCAAATAACAATTTAAAAATTGCCTCAGGAGGAGTTCTGAGATTTCCTGGGGCCAGACAGGAAGGGGCCAGAGGAAATGAGAAGCCTGGGAAAGAGGTAATAAGTCACATTTAGATTCTCTGAGTGTCCCGGCTGCCAGCCAGCCCAGGACTGTGAACAGAGTCATAGATGCAAGAACTTTTTATAAAATTGCAAAGTAAAGCCTGCATTCTCATGGCATGGAGACAAGAGTACTCAGACATGAGACAGATGAAGAGCTGGAATTGGGGATGAAAGCAAGTGGATGAGGCATGCTTGTCTTGTGAGAAATCCTGGGGACAGAATTTAACAACTGGAGGAAAGGGAGGTGGGTGGAAAGGAGGGAATTCAGAGGGATGACCAAGGTAGAAGCACAATTCTCCTCATGGACTCTGCAGAATCTCGCAGTTGATTGGGGTGGTGCTGATGGTATCAGGAGACAGATTAGTTTTGAAGCCAATGCTTCCCGAATTCCATCTGTTGTGAGGACATTCACAGGGATGGCAGGAAAAAAAAAAACAATCCAAACTCTTTGCGACTAGGAACAATTGGGGCAGCTAGGCTAGCTCTTCTAAAGTCTCACCTGAGCAAGGTCAGTCTGCAACCAGAGTACAGAGAGCTGACACTGGTGTTCTTGAACAAGGGCATAAGCTGTGGAAGAGGGAGAGGGAGGTGACTAGGAGGGCTGGGGATATAGTGGTGGGGCACAAGTGTGGCGAGTGCAGCTGGCACCAGTGGGCAGGGCTTGTGTCTCTAGGGGAGATGGATGAGATTTTCTTCCATGGCCCTGGCATGGGTGATGTTGTTGGGACCAGGTAACGTGGGAGGGGCAGAGTAAGTTGGACAGGGGTCTCACCAGACCCTGAAGGACCCTCTCCGTGGTGTTGAACTTCCTGGAGCCAGGGCGACGCATGTCCTCCTCATACTGCAGGTTAGTGATGGTAAAATTGAGGGTGAATGGCAGCAGGACAGGGCCAGTGGCTGTAGTAAAAGGTGTGGAAACAAACACTATTGTGTTTGTTTGTATTTTTAATCTGTTGCATATAGTTAAAAATAAGTCTATTGCAAACTCCTCAAGTTTCTCGACTTTTGCAATAGACTTGAAGAGTTTGCAATAAGTTTATTTTTAATCATATGCAACAGAATTTAAAATGAATGACAAAATTAAAGGTTTCCATGCCCTTCTTGATAGAAATTATTTTTTTAAAAAGCATATTACATAATATTTGTTAGATTCTGAGCCCAGCTTTTGGTGCATATTCAGTGCTGAAAAATATCAGATATTACCTGTTACCTTTGTCATCACCATTATCATCATCATCATCATCATCATCACCATTATTGTCAGCATCAGCACCAAGACCATCACCACAACGATAATCCCCATCAATACTACCACAAACATCATTATCACCATAGTCATATCATCATTACCATCATAATCATCATCACTGTCATTATCTGAATAATCATCATCACGATCATTGTCATCATAATCACTATCGCCATCACTATTGTCATCCTCACCATCACGAATGTCACAATCCACATCAACACCACCACAAACATCATCATCATCACCATAATCACATCATCATTAGCATCATAACCAGCAACATTACTGTCATCATCCAAACAACCATCATCATCTTTACCATCATTGTCACCATTGTTATCATCACCACTCCAACATCATCACCACAATCAACACAACCGTCATCATCATCATTATTATTATCATTATTGTCAAAATCCCCATAATTATTGTCAATTGTAATCCCCACCACCACCATCATTCTTCCCATGATTACCACCAGCATAATCATCATTATCCTTACCATCACCACCACCACCACCACCACTGCAATCATCATCACCGCCACCCCCATCATCATCATCATCACAATAATATAAGCTCACTGAGGGCAGTGTTTTTTCTTTTTTTGTTCCCTGATGTATATCCAGGTCCTGATACAGAGGCAGTGCTCAATGAACTGGGTTGGTTGACCTATGGTTACTACATTTACTATGAATTTCCATCTTTAGTTGAGTGAATAATGACAGAGATGTTTACACTCATCAACCATGCTCTAACAGAGTATAAACAGAGATGTTTACACTCATCAACCATGCTCCAAATACCATTGACTGGTATTTACCTGTGGGGCCAGGGAGGGATGATGGAGTCTCAGAGGTTTCCGGCTGTACTGTGAAAGTCCCAGGAGCTGAGGAGAAGCCCTCAACAGTGAAAGGAGCAAGTGCTCTCATGGAAGTAATGCCAGCAGGAGGGACTATTCATGGGGATTTTTGCCTGTGGGGCTTCAGCTGGGATTCTGATTAGCTGGGGCAGTGGGGAAGGGGAATAATTTGTGTCCTCTATGTTCCTTCCCTTATGGAAAGGACATAAGGGCTTATGAATAGGGGTCAGCTCCTGCTAAGTTTGTTGTAAGATTGGGGCCATGGGAACATCAGTAGAATACTCACTGCTGGTGGTGGGCACAGAGCTCCGCTGGGTGAAACCTGCATAGAGAGGGAGGGAGGAGTGTGGATAAGAGTCAAGGGGGAGGTAGGGGGTCAAAAAAAAGTCAGGGCTACCCTGGTGGAATGAAAGGAGTTTCATGGAACAGGGGCCTTTGGAGATTCTCTGTCAGAATCTGAGTATCAGGGAAGAAGCTACTAAGGATGAAATCAGTTCAAGATAAAGAAGAAATCATGAAATTCTGACATCAATCACCACATACAGACGTGTCCTAGCCTTCAGTGTCCAAGCAGGGCATGTGACCTCAAGTAAGAGAGTAGAGGCTGCCCAGGCAGGAGAAAGAGACTCTAATCACATCAGAGCTGCTTACCATTGACATAGAGACTGTCCCTATCCAGGGTGTAGGGTCCCAGCTCAGTGATGCTGTGGGTCAGCTGGCTCAGCTCCCAGTATAGCTGCTGTCTGTCTAGCCCAGGGATTTTGGGGTCAGGGCGGTGGGTGCAGATGGCGTCCACTCTGGTGGCTACTCCATCCTTCTCAGGCCTGGGGAGAGGTGGTGAGGGGAATGGCAATAAATGAATTGCCTAAGGAGAGGTCTTGAGATGGCAGAGTGTAGGACAAGAAGGACAGAGGAAGTAAAAAGTCTAATACAGAGGTGGGAAGCAATACTTAGATTCCCTGAGTGTACTGGACTGCTTGGGACTGTGAGCATAGTTGTGGATATAACAGTTTTCCCAAAATGGCAGGGTGGATCCTGCATTCTTAAAGCATGGAAACAGGAGAGCTCAGACTTGAGATAGCTGAAGAGCTGGAATTGGGGATGAAAACAAGTGGTTGAGGCATGTTTGTCTTGAGAGAAATCCTGGTTACAGAGTTTAACACCTAAAGTCAAGGGAGGTGGGGGGAAAACAGGGAATTCTGAGGAATGGCCAGGGCAGAAGTCCAGTCCTGTTGATGGACTCTGGAATCTCTCAGTCAGTTTGGAAGTGGGGGCGGGGGCACTGCTAATACCAGGAGACAGGTTAGTCCTGGAGCCAGTTTCCTGGATTCCATCTTTGATAAGAATAATCACAAAGAGGACAGGAAAAAAGAAGTCCAAACTCATTGAGACTAGGAATAATCGGGGCAGCCACACTGGAAATT"; + altSeq = "GTTTAGAGGGTCAAGGCGGTGAGTGCAGATGGTGTCCACGCCGGTGGCTGCCCCACGTTTTTCAGGCCTGAGGAGGATAAGTGAGGGGAATGACAATAAGTGGATTGCCTAGGAAGAGGTCCTGAGATTCCTGAGGGAAGGACAGGCAGGGGCCAGAGGAAGTGGTACAGAGGTGAGAAGCTACACATAGATTCTGTGAGTGCACCAACTCCCAGCCAGCCTGGGTCTATGAGCAGAATCATGGACACAAAAATATTTGTAACATTATAAAGTGGATCCTCTGTTCTCAGAAGATGGGGACAGGAGAGCTCAGACTTGAGACAGGTGAAGAGCTGGAATTGGGGATCAAAGCAAGTGGATGAGGCATGTTTGTGTTTGAAAGAAATGCTGGTTGTAGAGTTTATTATCTGCAGGAAAGGGAGGTGGTGGGAAAGGAGGGAACTTAGAGGGATGACCAGGGTAGGGGCCCTATCCTGCTGATGAGCTCTGCAGCATCACTGTTGAGTGGAGAGATGTTACTATCAGGGACACTTTGGTCCTGGAGCCACTGCCTCCTGGATTCCACCAGTGCTGAGGGCACCCCTAGGGAGGGCAGGAAGGAAGTTTTCCAAACTTCTTGGGACTGGGAACAATTGGAGAAGGTAGGCTGGCTCCT"; + ctgNames = Arrays.asList("asm026939:tig00007"); + refSegments = Arrays.asList(new SimpleInterval("chr20:8888822-8895288"), new SimpleInterval("chr20:8895288-8895361"), new SimpleInterval("chr20:8895361-8895655")); + altArrangements = Arrays.asList("UINS-297","2","UINS-280"); + complex = makeTestComplexVariant(affectedInterval, svLen, refAllele, altSeq, ctgNames, refSegments, altArrangements); + expectedSimple = Arrays.asList(makeDeletion(new SimpleInterval("chr20:8888822-8895287"), Allele.create("G", true)).attribute(EVENT_KEY, "CPX_chr20:8888822-8895655").attribute(CONTIG_NAMES, ctgNames).make(), + makeDeletion(new SimpleInterval("chr20:8895361-8895654"), Allele.create("T", true)).attribute(EVENT_KEY, "CPX_chr20:8888822-8895655").attribute(CONTIG_NAMES, ctgNames).make(), + makeInsertion("chr20", 8888822, 8888822, 298, Allele.create("G", true)).attribute(EVENT_KEY, "CPX_chr20:8888822-8895655").attribute(CONTIG_NAMES, ctgNames).make(), + makeInsertion("chr20", 8895655, 8895655, 281, Allele.create("T", true)).attribute(EVENT_KEY, "CPX_chr20:8888822-8895655").attribute(CONTIG_NAMES, ctgNames).make()); + data.add(new Object[]{complex, b38_reference_chr20_chr21, multiSegmentsCpxVariantExtractor, expectedSimple}); + + return data; + } + @DataProvider(name = "forTestSegmentedCpxVariantExtractor") + private Object[][] forTestSegmentedCpxVariantExtractor() { + final List data = caseForZeroAndOneSegmentCalls(); + data.addAll(caseForMultiSegmentsCalls()); + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forTestSegmentedCpxVariantExtractor") + public void testSegmentedCpxVariantExtractor(final VariantContext complexVC, final ReferenceMultiSource reference, + final SegmentedCpxVariantSimpleVariantExtractor worker, + final List expected) { + assertVariantsAreEqual(worker.extract(complexVC, reference), expected, Collections.emptyList(), b38_seqDict_chr20_chr21); + } + + @DataProvider(name = "forTestMultiSegmentsCpxVariantExtractorFindAllSegments") + private Object[][] forTestMultiSegmentsCpxVariantExtractorFindAllSegments() { + final List data = new ArrayList<>(20); + + List altArrangement = Arrays.asList("UINS-58", "1", "2", "2"); + data.add(new Object[]{altArrangement, 2, 1}); + data.add(new Object[]{altArrangement, 3, -1}); + + altArrangement = Arrays.asList("UINS-58","1","2","3","1","chrX:10000-10200","1","3","3","UINS-15"); + data.add(new Object[]{altArrangement, 3, 1}); + altArrangement = Arrays.asList("UINS-58","1","2","3","1","chrX:10000-10200","1","2","3","UINS-15"); + data.add(new Object[]{altArrangement, 3, 6}); + + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forTestMultiSegmentsCpxVariantExtractorFindAllSegments") + public void testMultiSegmentsCpxVariantExtractorFindAllSegments(final List altArrangement, final int segmentCount, + final int expectedIdx) { + Assert.assertEquals(findAllSegments(altArrangement, segmentCount), + expectedIdx); + } + + @DataProvider(name = "forTestMultiSegmentsCpxVariantExtractorCompactifyMissingSegments") + private Object[][] forTestMultiSegmentsCpxVariantExtractorCompactifyMissingSegments() { + final List data = new ArrayList<>(20); + + data.add(new Object[]{Sets.newHashSet(new SimpleInterval("chr1:10000-10010"), new SimpleInterval("chr1:10012-10020")), + Arrays.asList(new SimpleInterval("chr1:10000-10010"), new SimpleInterval("chr1:10012-10020"))}); + + data.add(new Object[]{Sets.newHashSet(new SimpleInterval("chr1:10000-10010"), new SimpleInterval("chr1:10011-10020")), + Arrays.asList(new SimpleInterval("chr1:10000-10020"))}); + + data.add(new Object[]{Sets.newHashSet(new SimpleInterval("chr1:10000-10010"), new SimpleInterval("chr1:10010-10020")), + Arrays.asList(new SimpleInterval("chr1:10000-10020"))}); + + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forTestMultiSegmentsCpxVariantExtractorCompactifyMissingSegments") + public void testMultiSegmentsCpxVariantExtractorCompactifyMissingSegments(final Set missingSegments, + final List expected) { + Assert.assertEquals(compactifyMissingSegments(missingSegments), + expected); + } + + //================================================================================================================== + + @DataProvider(name = "forTestIsConsistentWithCPX") + private Object[][] forTestIsConsistentWithCPX() { + final List data = new ArrayList<>(20); + + List refSegments = Arrays.asList(new SimpleInterval("chr1:4939507-4939535"), new SimpleInterval("chr1:4939535-4939614")); + List altArrangements = Arrays.asList("UINS-58", "1", "2", "-2"); + VariantContext complex = makeTestComplexVariant(new SimpleInterval("chr1:4939506-4939614"), 109, "ATATATAT", "TTTTTTTTTTTTTTTTTTTTT", Collections.singletonList("dummy"), + refSegments, altArrangements); + RelevantAttributes relevantAttributes = new RelevantAttributes(complex); + data.add(new Object[]{makeInversion(new SimpleInterval("chr1:4939535-4939614"), Allele.create("A", true)).make(), relevantAttributes, + false}); + data.add(new Object[]{makeInsertion("chr1", 4939507, 4939507, 59, Allele.create("A", true)).make(), relevantAttributes, true}); + data.add(new Object[]{makeDeletion(new SimpleInterval("chr1:4939535-4939614"), Allele.create("A", true)).make(), relevantAttributes, false}); + + complex = makeTestComplexVariant(new SimpleInterval("chr6:857170-857852"), -477, + "CTCTCTTCAGAGGAAATAAATTAAAATATACTAATTGTGTTAGAAAAGCCTAAACCTTAAAATTCAATATAATTGTGGTCAAATAATGCAGATTATGAAATGTGCATGTGAGAGTCCTAGCTCAAGGAGAAGTCGTGCCTCAGTCACTGCCACTCGTCCACCCATCCATCCTCCTATCCGCTCATTTATCCATCCATCCACCTATCCATCCATCCATCCATCCATCCATCCATCCATCCATCCTCCTATCCTCTCATTTATCCATCCATCCACCTATCCACCCATCCATCCATCCTCCTATCCTCTCATTTATCCATCCATCCACCTATCCATTCCTCCATCCATCCTCCTCTCATTTATCCATCCATCCACCTATCCATCCATCCATCCATCCATCCATCCATCCATCCATCCATCCATCCTCCTGTCCTCTCATTTATCCATCCATCCACCTATCCACCCATCCATCCATCCTCCTATCCTCTCATTTATCCATCCATCCACCTAGTCACCCATCCATCCATCCTCTTATCCTCTCATTTATCCATCCATCCACCTATCCGTCCATCCATCCTCCTATCCTCTCATTTATCCATCCATCCACCTATCAATCCATCCATCCATCCACCTATCAATCCATCCATCCATCCTCCTATCCTCTCATTTATCCATCGATCCACCTA", + "TTTTTTTTTTTTTTTTTTTTT",Collections.singletonList("asm009963:tig00000"), + Arrays.asList(new SimpleInterval("chr6:857170-857564"), new SimpleInterval("chr6:857564-857767"), new SimpleInterval("chr6:857767-857852")), Arrays.asList("2")); + relevantAttributes = new RelevantAttributes(complex); + data.add(new Object[]{makeDeletion(new SimpleInterval("chr6:857169-857562"), Allele.create("G", true)).make(), relevantAttributes, true}); + + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forTestIsConsistentWithCPX") + public void testIsConsistentWithCPX(final VariantContext simpleVariant, final RelevantAttributes complexVarAttributes, + final boolean expected) { + Assert.assertEquals(isConsistentWithCPX(simpleVariant, complexVarAttributes), + expected); + } + + @DataProvider(name = "forTestDeletionConsistencyCheck") + private Object[][] forTestDeletionConsistencyCheck() { + final List data = new ArrayList<>(20); + + VariantContext del = makeDeletion(new SimpleInterval("chr6:857169-857562"), Allele.create("G", true)).make(); + HashSet missingSegments = Sets.newHashSet(new SimpleInterval("chr6:857170-857564"), new SimpleInterval("chr6:857767-857852")); + + data.add(new Object[]{del, Collections.emptySet(), false}); + data.add(new Object[]{del, missingSegments, true}); + + data.add(new Object[]{del, Collections.singleton(new SimpleInterval("chr6:857767-857852")), false}); + + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forTestDeletionConsistencyCheck") + public void testDeletionConsistencyCheck(final VariantContext simple, final Set missingSegments, + final boolean expected) { + Assert.assertEquals(deletionConsistencyCheck(simple, missingSegments), + expected); + } + + @DataProvider(name = "forPostProcessConvertShortDupToIns") + private Object[][] forPostProcessConvertShortDupToIns() { + final List data = new ArrayList<>(20); + + // inversion, no effect + VariantContext var = makeInversion(new SimpleInterval("chr1:10001-10100"), Allele.create("A", true)).make(); + data.add(new Object[]{var, var}); + + // deletion, no effect + var = makeDeletion(new SimpleInterval("chr1:10001-10100"), Allele.create("A", true)).make(); + data.add(new Object[]{var, var}); + + // insertion, no effect + var = makeInsertion("chr1", 11111, 11111, 100, Allele.create("A", true)).make(); + data.add(new Object[]{var, var}); + + // dup large enough + var = new VariantContextBuilder().chr("chr2").start(241987322).stop(241987322) + .id("INS-DUPLICATION-TANDEM-EXPANSION_chr2_241987322_241987322_CPX_DERIVED") + .alleles(Arrays.asList(Allele.create("T", true), Allele.create(""))) + .attribute(VCFConstants.END_KEY, 241987322) + .attribute(CONTIG_NAMES, "asm004634:tig00000") + .attribute(ALIGN_LENGTHS, 1125) + .attribute(HQ_MAPPINGS, 1) + .attribute(MAPPING_QUALITIES, 60) + .attribute(TOTAL_MAPPINGS,1) + .attribute(MAX_ALIGN_LENGTH, 1125) + .attribute(INSERTED_SEQUENCE, "AGTGGATGGCGTTGGGTGTACTCGGAGATCCAGTCGGTGGCGTTGGGTGTACTCAGAGATCCAGCTGATGGCATTCAGCGTACTCGGAGATCCAGTTGATGGTGTTGGGTGTTCTCGGAGATCCAGTCGGTGGCGTTGGGTGTACTCAGAGATCCAGTTGATGGCATTCAGTGTACTCGGAGATCTAGTCGATGGC") + .attribute(INSERTED_SEQUENCE_LENGTH, 196) + .attribute(INSERTED_SEQUENCE_MAPPINGS, "1102_1242_chr2:241987361-241987501_-_1101H141M1284H_60_8_101_O,1176_1373_chr2:241987323-241987520_-_1175H198M1153H_60_12_138_O") + .attribute(SEQ_ALT_HAPLOTYPE, "GTTGGGTGTACTCGGAGATCCGGTCGATGGCGTTGGGTGTACTCGGAGATCCAGTCGGTGGCGTTGGGTGTACTCGGAGATCCAGTCGGTGGCGTTGGGTGTACTCGGAGATCCAGTCGGTGGCGTTGGGTGTACTCGGAGATCCAGTGGATGGCGTTGGGTGTACTCGGAGATCCAGTCGGTGGCGTTGGGTGTACTCGGAGATCCAGTGGATGGCGTTGGGTGTACTCGGAGATCCAGTCGGTGGCGTTGGGTGTACTCAGAGATCCAGCTGATGGCATTCAGCGTACTCGGAGATCCAGTTGATGGTGTTGGGTGTTCTCGGAGATCCAGTCGGTGGCGTTGGGTGTACTCAGAGATCCAGTTGATGGCATTCAGTGTACTCGGAGATCTAGTCGATGGCGTTGGGTGTACTCGGAGATCCAGTTGATGGCATTCAGCGTACTCGGAGATCCAGTTGATGGTGTTGGGTGTTCTCGGAGATCCAGTCGGTGGTGTTGGGTGTACTCAGAGATCCAGTTGATGGCATTCATTGTACTCGGAGATCCAGTCGATGGCGTTGGGTGTACTTGGAGATCCAGTCGGTGGCGTTGGGTGTACTTGGAGATCC") + .attribute(SVLEN, 403) + .attribute(SVTYPE, "DUP") + .attribute(EVENT_KEY, "CPX_chr2:241987323-241987529") + .attribute(DUPLICATION_NUMBERS, "1,2") + .attribute(DUP_ORIENTATIONS, "++") + .attribute(DUP_REPEAT_UNIT_REF_SPAN, "chr2:241987323-241987529") + .attribute(DUP_SEQ_CIGARS,"207M,207M") + .attribute(DUP_TAN_EXPANSION_STRING, "") + .make(); + data.add(new Object[]{var, var}); + + // dup not large enough + var = new VariantContextBuilder().chr("chr2").start(83340906).stop(83340906) + .id("INS-DUPLICATION-TANDEM-EXPANSION_chr2_83340906_83340906_CPX_DERIVED") + .alleles(Arrays.asList(Allele.create("T", true), Allele.create(""))) + .attribute(VCFConstants.END_KEY, 83340906) + .attribute(CONTIG_NAMES, "asm003204:tig00000") + .attribute(ALIGN_LENGTHS, 58) + .attribute(HQ_MAPPINGS, 1) + .attribute(MAPPING_QUALITIES, 60) + .attribute(TOTAL_MAPPINGS,1) + .attribute(MAX_ALIGN_LENGTH, 58) + .attribute(INSERTED_SEQUENCE, "TTAATATTATATTATATTATAATATATTTTAATATTATATTATATTATAATATATTTTAA") + .attribute(INSERTED_SEQUENCE_LENGTH, 60) + .attribute(INSERTED_SEQUENCE_MAPPINGS, "1280_1328_chr2:83340902-83340950_+_1279H49M62H_60_0_49_O") + .attribute(SEQ_ALT_HAPLOTYPE, "TATATTATAATATATTTTAATATTATATTATATTATAATATATTTTAATATTATATTATATTATAATATATTTTAATATTATATTATATTATAATATATTTTAATATATTATAATATATTTTAATATTATATTATATTATAATATATT") + .attribute(SVLEN, 104) + .attribute(SVTYPE, "DUP") + .attribute(EVENT_KEY, "CPX_chr2:83340902-83340950") + .attribute(DUPLICATION_NUMBERS, "1,2") + .attribute(DUP_ORIENTATIONS, "+-") + .attribute(DUP_REPEAT_UNIT_REF_SPAN, "chr2:83340907-83340950") + .attribute(DUP_SEQ_CIGARS,"44M,44M") + .attribute(DUP_TAN_EXPANSION_STRING, "") + .make(); + final VariantContext insertion = new VariantContextBuilder().chr("chr2").start(83340906).stop(83340906) + .id("INS-DUPLICATION-TANDEM-EXPANSION_chr2_83340906_83340906_CPX_DERIVED") + .alleles(Arrays.asList(Allele.create("T", true), Allele.create(""))) + .attribute(VCFConstants.END_KEY, 83340906) + .attribute(CONTIG_NAMES, "asm003204:tig00000") + .attribute(ALIGN_LENGTHS, 58) + .attribute(HQ_MAPPINGS, 1) + .attribute(MAPPING_QUALITIES, 60) + .attribute(TOTAL_MAPPINGS,1) + .attribute(MAX_ALIGN_LENGTH, 58) + .attribute(INSERTED_SEQUENCE, "TTAATATTATATTATATTATAATATATTTTAATATTATATTATATTATAATATATTTTAA") + .attribute(INSERTED_SEQUENCE_LENGTH, 60) + .attribute(INSERTED_SEQUENCE_MAPPINGS, "1280_1328_chr2:83340902-83340950_+_1279H49M62H_60_0_49_O") + .attribute(SEQ_ALT_HAPLOTYPE, "TATATTATAATATATTTTAATATTATATTATATTATAATATATTTTAATATTATATTATATTATAATATATTTTAATATTATATTATATTATAATATATTTTAATATATTATAATATATTTTAATATTATATTATATTATAATATATT") + .attribute(SVLEN, 104) + .attribute(SVTYPE, "INS") + .attribute(EVENT_KEY, "CPX_chr2:83340902-83340950") + .attribute(DUPLICATION_NUMBERS, "1,2") + .attribute(DUP_ORIENTATIONS, "+-") + .attribute(DUP_REPEAT_UNIT_REF_SPAN, "chr2:83340907-83340950") + .attribute(DUP_SEQ_CIGARS,"44M,44M") + .attribute(DUP_TAN_EXPANSION_STRING, "") + .make(); + data.add(new Object[]{var, insertion}); + + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forPostProcessConvertShortDupToIns") + public void testPostProcessConvertShortDupToIns(final VariantContext simpleVariant, + final VariantContext expected) { + VariantContextTestUtils.assertVariantContextsAreEqual(postProcessConvertShortDupToIns(simpleVariant), expected, Collections.emptyList()); + } + + @DataProvider(name = "forPostProcessConvertReplacementToFatInsOrInsAndDel") + private Object[][] forPostProcessConvertReplacementToFatInsOrInsAndDel() { + final List data = new ArrayList<>(20); + + // rare but possible: no variant would be emitted + data.add(new Object[]{new VariantContextBuilder().chr("chr9").start(108455205).stop(108455252).alleles(Arrays.asList(Allele.create("T", true), Allele.create(""))).attribute(VCFConstants.END_KEY, 108455252).attribute(SVTYPE, "DEL").attribute(SVLEN, -47).attribute(INSERTED_SEQUENCE_LENGTH, 7).attribute(INSERTED_SEQUENCE, "ATCTTAT").make(), + Collections.emptyList() + }); + + // fat insertion + VariantContext deletion = makeDeletion(new SimpleInterval("chr21:23428920-23428967"), Allele.create("T", true)).attribute(ALIGN_LENGTHS, "56,56").attribute(EVENT_KEY, "CPX_chr21:23428920-23429023").attribute(CONTIG_NAMES, "asm029052:tig00000,asm029052:tig00001").attribute(HQ_MAPPINGS, 2).attribute(INSERTED_SEQUENCE, "ATAAATATATATAATAATATATAATTATATATTATATTATATAATATAATAATATATATTATAATACATTATATAATATATTATA").attribute(INSERTED_SEQUENCE_LENGTH, 85).attribute(INSERTED_SEQUENCE_MAPPINGS, "1330_1385_chr21:23428968-23429023_+_1329H56M1200H_49_4_36_O,1330_1385_chr21:23428968-23429023_+_1329H56M1200H_49_4_36_O").attribute(MAPPING_QUALITIES, "60,60").attribute(MAX_ALIGN_LENGTH, 56).attribute(SEQ_ALT_HAPLOTYPE, "ATAAATATATATAATAATATATAATTATATATTATATTATATAATATAATAATATATATTATAATACATTATATAATATATTATA").attribute(TOTAL_MAPPINGS, 2).make(); + VariantContext fatInsertion = makeInsertion("chr21", 23428920, 23428967, 85, Allele.create("TTTATATAAATATATATAAATATATAATATATAATAATATAATATAAT", true)).attribute(ALIGN_LENGTHS, "56,56").attribute(EVENT_KEY, "CPX_chr21:23428920-23429023").attribute(CONTIG_NAMES, "asm029052:tig00000,asm029052:tig00001").attribute(HQ_MAPPINGS, 2).attribute(INSERTED_SEQUENCE, "ATAAATATATATAATAATATATAATTATATATTATATTATATAATATAATAATATATATTATAATACATTATATAATATATTATA").attribute(INSERTED_SEQUENCE_LENGTH, 85).attribute(MAPPING_QUALITIES, "60,60").attribute(MAX_ALIGN_LENGTH, 56).attribute(SEQ_ALT_HAPLOTYPE, "ATAAATATATATAATAATATATAATTATATATTATATTATATAATATAATAATATATATTATAATACATTATATAATATATTATA").attribute(TOTAL_MAPPINGS, 2).make(); + data.add(new Object[]{deletion, + Collections.singletonList(fatInsertion) + }); + + // deletion with small insertion, i.e. no modification + VariantContext deletionWithMicroInsertion = new VariantContextBuilder().chr("chr20").start(63093346).stop(63094245).alleles(Arrays.asList(Allele.create("G", true), Allele.create(""))).attribute(VCFConstants.END_KEY, 63094245).attribute(SVTYPE, "DEL").attribute(SVLEN, -899).attribute(INSERTED_SEQUENCE_LENGTH, 1).attribute(INSERTED_SEQUENCE, "T").attribute(ALIGN_LENGTHS,942).attribute(EVENT_KEY, "CPX_chr20:63092255-63094246").attribute(CONTIG_NAMES, "asm028762:tig00002").attribute(HQ_MAPPINGS, 1).attribute(MAPPING_QUALITIES, "60").attribute(MAX_ALIGN_LENGTH,942).attribute(SEQ_ALT_HAPLOTYPE, "T").attribute(TOTAL_MAPPINGS, 1).make(); + data.add(new Object[]{deletionWithMicroInsertion, + Collections.singletonList(deletionWithMicroInsertion) + }); + + // deletion and insertion at the same time (location massaging) + VariantContext sourceDeletion = makeDeletion(new SimpleInterval("chr20:440444-440697"), Allele.create("A", true)).attribute(EVENT_KEY, "CPX_chrX:439692-440698").attribute(CONTIG_NAMES, "asm030101:tig00001").attribute(TOTAL_MAPPINGS, 1).attribute(MAPPING_QUALITIES, 60).attribute(HQ_MAPPINGS, 1).attribute(ALIGN_LENGTHS, 170).attribute(MAX_ALIGN_LENGTH, 170).attribute(INSERTED_SEQUENCE_LENGTH, 60).attribute(INSERTED_SEQUENCE, "TTCATACACACACAGATACACACCCGCGCACACACAGATGCACACACACACCCGTACACT").attribute(SEQ_ALT_HAPLOTYPE, "TTCATACACACACAGATACACACCCGCGCACACACAGATGCACACACACACCCGTACACT").make(); + VariantContext linkedDel = makeDeletion(new SimpleInterval("chr20:440444-440697"), Allele.create("A", true)).attribute(LINK, "INS_chr20_440444_440444_CPX_DERIVED").attribute(EVENT_KEY, "CPX_chrX:439692-440698").attribute(CONTIG_NAMES, "asm030101:tig00001").attribute(TOTAL_MAPPINGS, 1).attribute(MAPPING_QUALITIES, 60).attribute(HQ_MAPPINGS, 1).attribute(ALIGN_LENGTHS, 170).attribute(MAX_ALIGN_LENGTH, 170).make(); + VariantContext linkedIns = makeInsertion("chr20", 440444, 440444, 60, Allele.create("A", true)).attribute(LINK, "DEL_chr20_440444_440697_CPX_DERIVED").attribute(EVENT_KEY, "CPX_chrX:439692-440698").attribute(CONTIG_NAMES, "asm030101:tig00001").attribute(TOTAL_MAPPINGS, 1).attribute(MAPPING_QUALITIES, 60).attribute(HQ_MAPPINGS, 1).attribute(ALIGN_LENGTHS, 170).attribute(MAX_ALIGN_LENGTH, 170).attribute(INSERTED_SEQUENCE_LENGTH, 60).attribute(INSERTED_SEQUENCE_LENGTH, 60).attribute(INSERTED_SEQUENCE, "TTCATACACACACAGATACACACCCGCGCACACACAGATGCACACACACACCCGTACACT").attribute(SEQ_ALT_HAPLOTYPE, "TTCATACACACACAGATACACACCCGCGCACACACAGATGCACACACACACCCGTACACT").make(); + data.add(new Object[]{sourceDeletion, + Arrays.asList(linkedIns, linkedDel) + }); + + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forPostProcessConvertReplacementToFatInsOrInsAndDel") + public void testPostProcessConvertReplacementToFatInsOrInsAndDel(final VariantContext simpleVariant, + final List expected) { + assertVariantsAreEqual(postProcessConvertReplacementToFatInsOrInsAndDel(simpleVariant, b38_reference_chr20_chr21).collect(Collectors.toList()), + expected, Collections.emptyList(), b38_seqDict_chr20_chr21); + } + + @DataProvider(name = "forTestRemoveDuplicates") + private Object[][] forTestRemoveDuplicates() { + final List data = new ArrayList<>(20); + + final List sourceWithLessAnnotations = new ArrayList<>(); + final List sourceWithMoreAnnotations = new ArrayList<>(); + final List expected = new ArrayList<>(); + + final VariantContext firstInsertion = makeInsertion("chr21", 46069065, 46069065, 60, Allele.create("C", true)).attribute(EVENT_KEY, "CPX_chr21:46069065-46069209").attribute(CONTIG_NAMES, "asm029362:tig00001,asm029362:tig00002").make(); + final VariantContext firstInsertionWithMoreAnnotations = makeInsertion("chr21", 46069065, 46069065, 60, Allele.create("C", true)).attribute(EVENT_KEY, "CPX_chr21:46069065-46069209").attribute(CONTIG_NAMES, "asm029362:tig00001,asm029362:tig00002") + .attribute(TOTAL_MAPPINGS, 2).attribute(MAPPING_QUALITIES, "60,60").attribute(HQ_MAPPINGS, "2").attribute(ALIGN_LENGTHS, "91,91").attribute(MAX_ALIGN_LENGTH, "91").attribute(INSERTED_SEQUENCE, "CTAGGTGTGTGCATGTGTGCACACGTGTGTGCATGTGTGTGCATGTGTGCACACGTGTGT").attribute(INSERTED_SEQUENCE_LENGTH, 60).attribute(SEQ_ALT_HAPLOTYPE, "CTAGGTGTGTGCATGTGTGCACACGTGTGTGCATGTGTGTGCATGTGTGCACACGTGTGT").make(); + sourceWithLessAnnotations.add(firstInsertion); + sourceWithMoreAnnotations.add(firstInsertionWithMoreAnnotations); + expected.add(firstInsertionWithMoreAnnotations); + + final VariantContext firstDeletion = makeDeletion(new SimpleInterval("chr21:46069156-46069208"), Allele.create("A", true)).attribute(EVENT_KEY, "CPX_chr21:46069065-46069209").attribute(CONTIG_NAMES, "asm029362:tig00001,asm029362:tig00002").make(); + final VariantContext firstDeletionWitMoreAnnotations = makeDeletion(new SimpleInterval("chr21:46069156-46069208"), Allele.create("A", true)).attribute(EVENT_KEY, "CPX_chr21:46069065-46069209").attribute(CONTIG_NAMES, "asm029362:tig00001,asm029362:tig00002") + .attribute(TOTAL_MAPPINGS, 2).attribute(MAPPING_QUALITIES, "60,60").attribute(HQ_MAPPINGS, "2").attribute(ALIGN_LENGTHS, "91,91").attribute(MAX_ALIGN_LENGTH, "91").make(); + sourceWithLessAnnotations.add(firstDeletion); + sourceWithMoreAnnotations.add(firstDeletionWitMoreAnnotations); + expected.add(firstDeletionWitMoreAnnotations); + + // locations below seems inconsistent with the annotations, but that's because we artificially put data on chr20 and 21 + final VariantContext insertionFromStringParsing = makeInsertion("chr20", 439692, 439692, 130, Allele.create("A", true)).attribute(EVENT_KEY, "CPX_chrX:439692-440698").attribute(CONTIG_NAMES, "asm030101:tig00001").make(); + final VariantContext deletionFromStringParsing = makeDeletion(new SimpleInterval("chr20:439692-440161"), Allele.create("A", true)).attribute(EVENT_KEY, "CPX_chrX:439692-440698").attribute(CONTIG_NAMES, "asm030101:tig00001").make(); + sourceWithLessAnnotations.add(insertionFromStringParsing); + expected.add(insertionFromStringParsing); + sourceWithLessAnnotations.add(deletionFromStringParsing); + expected.add(deletionFromStringParsing); + + final VariantContext inversionFromStringParsing = makeInversion(new SimpleInterval("chr21:187497346-187497595"), Allele.create("A", true)).attribute(EVENT_KEY, "CPX_chr1:187495696-187497598").attribute(CONTIG_NAMES, "asm001762:tig00000,asm001762:tig00001,asm001763:tig00000").make(); + sourceWithLessAnnotations.add(inversionFromStringParsing); + expected.add(inversionFromStringParsing); + + final VariantContext tandemDuplicationFromPairIteration = new VariantContextBuilder() + .chr("chr20").start(56839685).stop(56839685) + .id("INS-DUPLICATION-TANDEM-EXPANSION_chrY_56839685_56839685_CPX_DERIVED") + .alleles(Arrays.asList(Allele.create("A", true), Allele.create(""))) + .attribute(VCFConstants.END_KEY, 56839685) + .attribute(TOTAL_MAPPINGS, 1) + .attribute(MAPPING_QUALITIES, "60") + .attribute(HQ_MAPPINGS, 1) + .attribute(ALIGN_LENGTHS, 770) + .attribute(MAX_ALIGN_LENGTH, 770) + .attribute(INSERTED_SEQUENCE, "CTGTTGACTAGTCTTTGCCTACAGAGGGCGTTGTGACATATCTCTGCACTGATCTCTCAGGTGAGGTAACTTCTCTAGTCTCTGCCTACAGAGGG") + .attribute(INSERTED_SEQUENCE_LENGTH, 95) + .attribute(DUP_REPEAT_UNIT_REF_SPAN, "chrY:56839686-56839794") + .attribute(DUPLICATION_NUMBERS, "1,2") + .attribute(DUP_ORIENTATIONS, "++") + .attribute(DUP_SEQ_CIGARS, "109M,109M") + .attribute(DUP_TAN_EXPANSION_STRING, "") + .attribute(SEQ_ALT_HAPLOTYPE, "CATTGTGACTTATCTCTGCACTGATCACCCAGGTGATGTAACTCTTGTCTAGGCTCTGGCCACAGGGACATAGTGACATATATCTGCACTGATCACACAGGTAATGTAACTGTTGACTAGTCTTTGCCTACAGAGGGCGTTGTGACATATCTCTGCACTGATCTCTCAGGTGAGGTAACTTCTCTAGTCTCTGCCTACAGAGGGCATTGTGACATCACTCTGCAATGATCACCCAGGTGATGTAACTCTTGTCTAGGCTCTGCCTACATGGACATTGTGACATGTCTCTGCACTGATCACCCAGGTGATGTAA") + .attribute(EVENT_KEY, "CPX_chrY:56838784-56839794") + .attribute(CONTIG_NAMES, "asm031346:tig00004") + .make(); + sourceWithMoreAnnotations.add(tandemDuplicationFromPairIteration); + expected.add(tandemDuplicationFromPairIteration); + + data.add(new Object[]{sourceWithLessAnnotations, sourceWithMoreAnnotations, expected}); + + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forTestRemoveDuplicates") + public void testRemoveDuplicates(final List sourceWithLessAnnotations, final List sourceWithMoreAnnotations, + final List expected) { + assertVariantsAreEqual(removeDuplicates(sourceWithLessAnnotations, sourceWithMoreAnnotations), expected, + Collections.emptyList(), CpxSVInferenceTestUtils.bareBoneHg38SAMSeqDict); + } + + //================================================================================================================== + + @DataProvider(name = "forTestGetInsFromOneEnd") + private Object[][] forTestGetInsFromOneEnd() { + final List data = new ArrayList<>(20); + + final SimpleInterval dummyInsertionPos = new SimpleInterval("chr1", 100, 100); + final Allele dummyRefAllele = Allele.create("A", true); + final List refSegmentLengths = Arrays.asList(30, 40, 50, 10); + + data.add(new Object[]{true, 4, dummyInsertionPos, dummyRefAllele, refSegmentLengths, Arrays.asList("1","2","3","4","1","2","3","4"), true, + makeInsertion(dummyInsertionPos.getContig(), dummyInsertionPos.getStart(), dummyInsertionPos.getEnd(), 131, dummyRefAllele).make()}); + data.add(new Object[]{false, 7, dummyInsertionPos, dummyRefAllele, refSegmentLengths, Arrays.asList("1","2","3","4","1","2","3","4"), true, + null}); + + data.add(new Object[]{true, 2, dummyInsertionPos, dummyRefAllele, refSegmentLengths, Arrays.asList(UNMAPPED_INSERTION + "-50", "-chrX:10001-10009", "1","2","3","3","1","4"), true, + makeInsertion(dummyInsertionPos.getContig(), dummyInsertionPos.getStart(), dummyInsertionPos.getEnd(), 60, dummyRefAllele).make()}); + data.add(new Object[]{false, 5, dummyInsertionPos, dummyRefAllele, refSegmentLengths, Arrays.asList("1","2","3","3","1","4", UNMAPPED_INSERTION + "-49"), true, + makeInsertion(dummyInsertionPos.getContig(), dummyInsertionPos.getStart(), dummyInsertionPos.getEnd(), 50, dummyRefAllele).make()}); + data.add(new Object[]{false, 5, dummyInsertionPos, dummyRefAllele, refSegmentLengths, Arrays.asList("1","2","3","3","1","4", UNMAPPED_INSERTION + "-49"), false, + null}); + + data.add(new Object[]{true, 1, dummyInsertionPos, dummyRefAllele, Collections.singletonList(20), Arrays.asList(UNMAPPED_INSERTION + "-100", "1"), true, + makeInsertion(dummyInsertionPos.getContig(), dummyInsertionPos.getStart(), dummyInsertionPos.getEnd(), 101, dummyRefAllele).make()}); + data.add(new Object[]{false, 0, dummyInsertionPos, dummyRefAllele, Collections.singletonList(20), Arrays.asList("1", UNMAPPED_INSERTION + "-100"), true, + makeInsertion(dummyInsertionPos.getContig(), dummyInsertionPos.getStart(), dummyInsertionPos.getEnd(), 101, dummyRefAllele).make()}); + data.add(new Object[]{true, 2, dummyInsertionPos, dummyRefAllele, Collections.singletonList(20), Arrays.asList("1", "-chrX:10001-10039", "1"), true, + makeInsertion(dummyInsertionPos.getContig(), dummyInsertionPos.getStart(), dummyInsertionPos.getEnd(), 60, dummyRefAllele).make()}); + data.add(new Object[]{true, 2, dummyInsertionPos, dummyRefAllele, Collections.singletonList(20), Arrays.asList("1", "-chrX:10001-10009", "1"), true, + null}); + data.add(new Object[]{false, 2, dummyInsertionPos, dummyRefAllele, Collections.singletonList(20), Arrays.asList("1", "-chrX:10001-10039", "1"), true, + null}); + data.add(new Object[]{false, 2, dummyInsertionPos, dummyRefAllele, Collections.singletonList(20), Arrays.asList("1", "-chrX:10001-10009", "1"), true, + null}); + + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forTestGetInsFromOneEnd") + public void testGetInsFromOneEnd(final boolean fromFront, final int idxFirstMatch, + final SimpleInterval insertionStartAndStop, final Allele anchorBaseRefAllele, + final List refSegmentLengths, final List altArrangement, + final boolean shouldIncreaseInsLenByOne, + final VariantContext expected) { + if (expected != null) { + VariantContextTestUtils.assertVariantContextsAreEqual(getInsFromOneEnd(fromFront, idxFirstMatch, insertionStartAndStop, anchorBaseRefAllele, refSegmentLengths, altArrangement, shouldIncreaseInsLenByOne).make(), + expected, Collections.emptyList()); + } else { + Assert.assertNull(getInsFromOneEnd(fromFront, idxFirstMatch, insertionStartAndStop, anchorBaseRefAllele, refSegmentLengths, altArrangement, shouldIncreaseInsLenByOne)); + } + } + + @DataProvider(name = "forTestGetInsLen") + private Object[][] forTestGetInsLen() { + final List data = new ArrayList<>(20); + + data.add(new Object[]{UNMAPPED_INSERTION + "-12", Collections.emptyList(), 12}); + + data.add(new Object[]{UNMAPPED_INSERTION + "-12", Collections.singletonList(89), 12}); + + data.add(new Object[]{"3", Arrays.asList(30, 40, 50, 10), 50}); + + data.add(new Object[]{"-4", Arrays.asList(30, 40, 50, 10), 10}); + + data.add(new Object[]{"chr1:1000-1000", Arrays.asList(30, 40, 50, 10), 1}); + + data.add(new Object[]{"chr1:1000-1001", Arrays.asList(30, 40, 50, 10), 2}); + + data.add(new Object[]{"-chr1:1001-1050", Arrays.asList(30, 40, 50, 10), 50}); + + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forTestGetInsLen") + public void testGetInsLen(final String description, final List refSegmentLengths, + final int expected) { + Assert.assertEquals(getInsLen(description, refSegmentLengths), + expected); + } + + @DataProvider(name = "forGetMissingAndPresentAndInvertedSegments") + private Object[][] forGetMissingAndPresentAndInvertedSegments() { + final List data = new ArrayList<>(20); + + data.add(new Object[]{Arrays.asList(new SimpleInterval("chr1:1001-1100")), Arrays.asList("-1"), + new Tuple3<>(Collections.emptySet(), Collections.emptySet(), Arrays.asList(1))}); + + final List refSegments = Arrays.asList(new SimpleInterval("chr1:1001-1100"), new SimpleInterval("chr1:1100-1200"), new SimpleInterval("chr1:1200-1300"), new SimpleInterval("chr1:1300-1400")); + data.add(new Object[]{refSegments, Arrays.asList("1","3","4"), + new Tuple3<>(Sets.newHashSet(new SimpleInterval("chr1:1100-1200")), new TreeSet<>(Sets.newHashSet(1,3,4)), Collections.emptyList())}); + + data.add(new Object[]{refSegments, Arrays.asList("1","4","2","3"), + new Tuple3<>(Collections.emptySet(), new TreeSet<>(Sets.newHashSet(1,2,3,4)), Collections.emptyList())}); + + data.add(new Object[]{refSegments, Arrays.asList("-4","1","3","2"), + new Tuple3<>(Collections.emptySet(), new TreeSet<>(Sets.newHashSet(1,2,3)), Arrays.asList(4))}); + + data.add(new Object[]{refSegments, Arrays.asList("1","3","4","-1"), + new Tuple3<>(Sets.newHashSet(new SimpleInterval("chr1:1100-1200")), new TreeSet<>(Sets.newHashSet(1,3,4)), Arrays.asList(1))}); + + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forGetMissingAndPresentAndInvertedSegments") + public void testGetMissingAndPresentAndInvertedSegments(final List refSegments, + final List altArrangements, + Tuple3, Set, List> expected) { + + final Tuple3, Set, List> actual = + getMissingAndPresentAndInvertedSegments(refSegments, altArrangements); + Assert.assertEquals(actual._1(), expected._1()); + Assert.assertEquals(actual._2(), expected._2()); + Assert.assertEquals(actual._3(), expected._3()); + } + + //================================================================================================================== + + private static VariantContext makeTestComplexVariant(final SimpleInterval affectedRefRegion, final int svLen, + final String referenceBases, final String altSeqBases, + final List contigNames, + final List referenceSegments, final List altArrangement) { + final VariantContextBuilder builder = new VariantContextBuilder() + .chr(affectedRefRegion.getContig()).start(affectedRefRegion.getStart()).stop(affectedRefRegion.getEnd()) + .alleles(Arrays.asList(Allele.create(referenceBases, true), + Allele.create(SimpleSVType.createBracketedSymbAlleleString(CPX_SV_SYB_ALT_ALLELE_STR)))) + .id(CPX_SV_SYB_ALT_ALLELE_STR + INTERVAL_VARIANT_ID_FIELD_SEPARATOR + affectedRefRegion.toString()) + .attribute(VCFConstants.END_KEY, affectedRefRegion.getEnd()) + .attribute(SVLEN, svLen) + .attribute(SVTYPE, CPX_SV_SYB_ALT_ALLELE_STR) + .attribute(CPX_EVENT_ALT_ARRANGEMENTS, String.join(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR, altArrangement)) + .attribute(CONTIG_NAMES, String.join(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR, contigNames)) + .attribute(SEQ_ALT_HAPLOTYPE, altSeqBases); + if (referenceSegments.isEmpty()) + return builder.make(); + else + return builder + .attribute(CPX_SV_REF_SEGMENTS, String.join(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR, + referenceSegments.stream().map(SimpleInterval::toString).collect(Collectors.toList()))) + .make(); + } + + private static void assertVariantsAreEqual(final Iterable actual, final Iterable expected, + final List attributesToIgnore, final SAMSequenceDictionary refSeqDict) { + + final List actualList = SVVCFWriter.sortVariantsByCoordinate(Utils.stream(actual).collect(Collectors.toList()), refSeqDict); + final List expectedList = SVVCFWriter.sortVariantsByCoordinate(Utils.stream(expected).collect(Collectors.toList()), refSeqDict); + if (actualList.size() != expectedList.size()) { + throw new AssertionError("Two sources of variants are not of the same size. expected size: " + expectedList.size() + "actual size: " + actualList.size()); + } + for (int i = 0; i < actualList.size(); ++i) { + VariantContextTestUtils.assertVariantContextsAreEqual(actualList.get(i), expectedList.get(i), attributesToIgnore); + } + } + +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/integration/CpxVariantReInterpreterSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/integration/CpxVariantReInterpreterSparkIntegrationTest.java new file mode 100644 index 00000000000..56f69df1a96 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/integration/CpxVariantReInterpreterSparkIntegrationTest.java @@ -0,0 +1,145 @@ +package org.broadinstitute.hellbender.tools.spark.sv.integration; + +import htsjdk.samtools.util.CloseableIterator; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; +import org.apache.hadoop.fs.Path; +import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.test.ArgumentsBuilder; +import org.broadinstitute.hellbender.utils.test.MiniClusterUtils; +import org.broadinstitute.hellbender.utils.test.VariantContextTestUtils; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +public class CpxVariantReInterpreterSparkIntegrationTest extends CommandLineProgramTest { + + private static final String THIS_TEST_FOLDER = getTestDataDir() + "/spark/sv/integration/inputs/"; + + private static final String complexVCF = THIS_TEST_FOLDER + "CpxVariantReInterpreterSparkIntegrationTest_complex.vcf"; + private static final String assemblyAlignmentsAccompanyingComplexVCF = largeFileTestDir + "CpxVariantReInterpreterSparkIntegrationTest_complex_assembly.bam"; + private static final File fastaReference = new File(b38_reference_20_21); + private static final String nonCanonicalChromosomeNamesFile = THIS_TEST_FOLDER + "Homo_sapiens_assembly38.kill.alts"; + private static final String outPrefix = "cpx_reinterpreted_simple"; + private static final String expectedOneSegmentVCF = getTestDataDir() + "/spark/sv/integration/outputs/cpx_reinterpreted_simple_1_seg.vcf"; + private static final String expectedMultiSegmentVCF = getTestDataDir() + "/spark/sv/integration/outputs/cpx_reinterpreted_simple_multi_seg.vcf"; + private static final List annotationsToIgnoreWhenComparingVariants = Collections.emptyList(); + + private static final class CpxVariantReInterpreterSparkIntegrationTestArgs { + final String outputDir; + + CpxVariantReInterpreterSparkIntegrationTestArgs(final String outputDir) { + this.outputDir = outputDir; + } + + String getCommandLine() { + return " -R " + fastaReference + + " -I " + assemblyAlignmentsAccompanyingComplexVCF + + " --non-canonical-contig-names-file " + nonCanonicalChromosomeNamesFile + + " --cpx-vcf " + complexVCF + + " --prefix-out-vcf " + outputDir + "/" + outPrefix; + } + } + + @DataProvider(name = "forCpxVariantReInterpreterSparkIntegrationTest") + private Object[][] createData() { + List data = new ArrayList<>(); + final File cpxVariantReInterpreterSparkIntegrationTest = createTempDir("CpxVariantReInterpreterSparkIntegrationTest"); + data.add(new Object[]{new CpxVariantReInterpreterSparkIntegrationTestArgs(cpxVariantReInterpreterSparkIntegrationTest.getAbsolutePath())}); + return data.toArray(new Object[data.size()][]); + } + + @Test(groups = "sv", dataProvider = "forCpxVariantReInterpreterSparkIntegrationTest") + public void testRunLocal(final CpxVariantReInterpreterSparkIntegrationTestArgs params) throws Exception { + final List args = Arrays.asList( new ArgumentsBuilder().add(params.getCommandLine()).getArgsArray() ); + runCommandLine(args); + + final String actualVCFForOneSegmentCalls = params.outputDir + "/" + outPrefix + "_1_seg.vcf"; + vcfEquivalenceTest(actualVCFForOneSegmentCalls, expectedOneSegmentVCF, annotationsToIgnoreWhenComparingVariants, false); + final String actualVCFForMultiSegmentCalls = params.outputDir + "/" + outPrefix + "_multi_seg.vcf"; + vcfEquivalenceTest(actualVCFForMultiSegmentCalls, expectedMultiSegmentVCF, annotationsToIgnoreWhenComparingVariants, false); + } + + @Test(groups = "sv", dataProvider = "forCpxVariantReInterpreterSparkIntegrationTest") + public void testRunHDFS(final CpxVariantReInterpreterSparkIntegrationTestArgs params) throws Exception { + MiniClusterUtils.runOnIsolatedMiniCluster(cluster -> { + + final List argsToBeModified = Arrays.asList( new ArgumentsBuilder().add(params.getCommandLine()).getArgsArray() ); + final Path workingDirectory = MiniClusterUtils.getWorkingDir(cluster); + + int idx = 0; + + // copy inputs + idx = argsToBeModified.indexOf("-I"); + Path path = new Path(workingDirectory, "hdfs.bam"); + File file = new File(argsToBeModified.get(idx+1)); + cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path); + argsToBeModified.set(idx+1, path.toUri().toString()); + path = new Path(workingDirectory, "hdfs.bam.bai"); // .bai + file = new File(file.getAbsolutePath() + ".bai"); + cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path); + + idx = argsToBeModified.indexOf("-R"); + path = new Path(workingDirectory, "reference.fasta"); + file = new File(argsToBeModified.get(idx+1)); + cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path); + argsToBeModified.set(idx+1, path.toUri().toString()); + path = new Path(workingDirectory, "reference.fasta.fai"); // .fasta.fai for fasta + file = new File(file.getAbsolutePath() + ".fai"); + cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path); + path = new Path(workingDirectory, "reference.dict"); // .dict for fasta + file = new File(file.getAbsolutePath().replace(".fasta.fai", ".dict")); + cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path); + + idx = argsToBeModified.indexOf("--non-canonical-contig-names-file"); + path = new Path(workingDirectory, "Homo_sapiens_assembly38.kill.alts"); + file = new File(argsToBeModified.get(idx+1)); + cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path); + argsToBeModified.set(idx+1, path.toUri().toString()); + + idx = argsToBeModified.indexOf("--cpx-vcf"); + path = new Path(workingDirectory, "CpxVariantReInterpreterSparkIntegrationTest_complex.vcf"); + file = new File(argsToBeModified.get(idx+1)); + cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path); + argsToBeModified.set(idx+1, path.toUri().toString()); + + // outputs, prefix with hdfs address + idx = argsToBeModified.indexOf("--prefix-out-vcf"); + path = new Path(workingDirectory, "test"); + argsToBeModified.set(idx+1, path.toUri().toString()); + + runCommandLine(argsToBeModified); + + final String actualVCFForOneSegmentCallsOnHDFS = path.toUri().toString() + "_1_seg.vcf"; + final String actualVCFForMultiSegmentCallsOnHDFS = path.toUri().toString() + "_multi_seg.vcf"; + + vcfEquivalenceTest(actualVCFForOneSegmentCallsOnHDFS, expectedOneSegmentVCF, annotationsToIgnoreWhenComparingVariants, true); + vcfEquivalenceTest(actualVCFForMultiSegmentCallsOnHDFS, expectedMultiSegmentVCF, annotationsToIgnoreWhenComparingVariants, true); + }); + } + + private static void vcfEquivalenceTest(final String generatedVCFPath, final String expectedVCFPath, + final List attributesToIgnore, final boolean onHDFS) throws Exception { + + List expectedVcs; + try (final VCFFileReader fileReader = new VCFFileReader(new File(expectedVCFPath), false) ) { + try (final CloseableIterator iterator = fileReader.iterator()) { + expectedVcs = Utils.stream(iterator).collect(Collectors.toList()); + } + } + + final List actualVcs = StructuralVariationDiscoveryPipelineSparkIntegrationTest + .extractActualVCs(generatedVCFPath, onHDFS); + + GATKBaseTest.assertCondition(actualVcs, expectedVcs, + (a, e) -> VariantContextTestUtils.assertVariantContextsAreEqual(a, e, attributesToIgnore)); + } +} diff --git a/src/test/resources/large/CpxVariantReInterpreterSparkIntegrationTest_complex_assembly.bam b/src/test/resources/large/CpxVariantReInterpreterSparkIntegrationTest_complex_assembly.bam new file mode 100644 index 00000000000..a049c824ddc --- /dev/null +++ b/src/test/resources/large/CpxVariantReInterpreterSparkIntegrationTest_complex_assembly.bam @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ab6e38e9d6f82e8893d5d690343eddbb8ba4a33c65b7594ae8ce704b9cb211c +size 139437 diff --git a/src/test/resources/large/CpxVariantReInterpreterSparkIntegrationTest_complex_assembly.bam.bai b/src/test/resources/large/CpxVariantReInterpreterSparkIntegrationTest_complex_assembly.bam.bai new file mode 100644 index 00000000000..d45b156b915 --- /dev/null +++ b/src/test/resources/large/CpxVariantReInterpreterSparkIntegrationTest_complex_assembly.bam.bai @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9792174f4a3e1d0b3821a9b8b8275060135dfb438dec111285902d85a401fb58 +size 398472 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/inputs/CpxVariantReInterpreterSparkIntegrationTest_complex.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/inputs/CpxVariantReInterpreterSparkIntegrationTest_complex.vcf new file mode 100644 index 00000000000..ead20dec8ee --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/inputs/CpxVariantReInterpreterSparkIntegrationTest_complex.vcf @@ -0,0 +1,66 @@ +##fileformat=VCFv4.2 +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +chr20 2379056 CPX_chr20:2379056-2379968 CTGCTGAAGGTCACACAGCCAGTTTGGCCCAGAGATAACAGAACCCAGGACTCTACTGCCAGGCTATGATGCTCACTTCTAAACTGAGTATGGGCCGGGTGCAGTGGCTCTCACTTGTAATCTCGCACTTTGGGAGGCTGAGGTGGGCAGATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACATGGCGAAACTGTCTCTACTTAAAATACACAAAAAATTAGCCGGGCATGGTGGCACACGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATTGCTTGAACCCGGGAAGTAGAGGTTGCACCGAGCTGAGATCGTGCCGCTGCACTTCACCCTAGGTGACAGAGCGAGACCCTGTCTCAAAAAAAAAAAACGAGGAGTATGAATTATTATTTGGGGTCTCGAGCAGTGGGGCAGGCAGTGATTTTGATGCCCTGTCTTCTGGGAGCTCCCGTCCAAGCAAGCCCAGGATGAAAGATAAAAGGGCCCTGAATAAGACCATCATTAGTGGGGAGGAGCCTGACCCTGCATAGGCAGAAAAGCAGCTTCAGCAGGGCCCAGGGTCTACAGGGCAGCAGCCTGCAGGGTGTTGAGGAAGGAGCTGCCCTGGGCTGCTGCCAGATGTCAGTAATTATAAGGCTATGTGGGTCTGCAGACAGCTCATTAAAGACGCTTGATCTGCGTGAAACCAGGAAGCAGAACTCAGCCTCTTTGCCACTTTCTGTAAGAAATCCAACCACATCAAACACTTTGGGGTTTTTATGCAGTTTCCTGAATGTTGGGGTGATCCCAGGGAGCGGCCTCATCCTACTGTGCTCAGCACACGGTTGTCTCCTTACCCTTCCTTGTCTCTTTCCCTCGATTCTACCCCCTCCCACTTCCCCTTCCTCACTGGCCCAG . . ALIGN_LENGTHS=1401;ALT_ARRANGEMENT=1,2,3,UINS-4,1,3;CTG_NAMES=asm027854:tig00003;END=2379968;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=1401;SEGMENTS=chr20:2379056-2379257,chr20:2379257-2379963,chr20:2379963-2379968;SEQ_ALT_HAPLOTYPE=CTGCTGAAGGTCACACAGCCAGTTTGGCCCAGAGATAACAGAACCCAGGACTCTACTGCCAGGCTATGATGCTCACTTCTAAACTGAGTATGGGCCGGGTGCAGTGGCTCTCACTTGGAATCTCGCACTTTGGGAGGCTGAGGTGGGCAGATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACATGGCGAAACTGTCTCTACTTAAAATACACAAAAAATTAGCCGGGCATGGTGGCACACGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATTGCTTGAACCCGGGAAGTAGAGGTTGCACCGAGCTGAGATCGTGCCGCTGCACTTCACCCTAGGTGACAGAGCGAGACCCTGTCTCAAAAAAAAAAAACGAGGAGTATGAATTATTATTTGGGGTCTCGAGCAGTGGGGCAGGCAGTGATTTTGATGCCCTGTCTTCTGGGAGCTCCCGTCCAAGCAAGCCCAGGATGAAAGATAAAAGGGCCCTGAATAAGACCATCATTAGTGGGGAGGAGCCTGACCCTGCATAGGCAGAAAAGCAGCTTCAGCAGGGCCCAGGGTCTACAGGGCAGCAGCCTGCAGGGTGTTGAGGAAGGAGCTGCCCTGGGCTGCTGCCAGATGTCAGTAATTATAAGGCTATGTGGGTCTGCAGACAGCTCATTAAAGACGCTTGATCTGCGTGAAACCAGGAAGCAGAACTCAGCCTCTTTGCCACTTTCTGTAAGAAATCCAACCACATCAAACACTTTGGGGTTTTTATGCAGTTTCCTGAATGTTGGGGTGATCCCAGGGAGCGGCCTCATCCTACTGTGCTCAGCACACGGTTGTCTCCTTACCCTTCCTTGTCTCTTTCCCTCGATTCTACCCCCTCCCACTTCCCCTTCCTCACTGGCCCAGTGAGGCTGCTGAAGGTCACACAGCCAGTTTGGCCCAGAGATAACAGAACCCAGGACTCTACTGCCAGGCTATGATGCTCACTTCTAAACTGAGTATGGGCCGGGTGCAGTGGCTCTCACTTGTAATCTCGCACTTTGGGAGGCTGAGGTGGGCAGATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACATGGCGAAACTGTGCCCAG;SVLEN=913;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr20 12558793 CPX_chr20:12558793-12558810 AAAAAAAAAAAAAAAAAA . . ALIGN_LENGTHS=597;ALT_ARRANGEMENT=-chrX:99014092-99014129,UINS-101;CTG_NAMES=asm027960:tig00003;END=12558810;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=597;SEGMENTS=chr20:12558793-12558810;SEQ_ALT_HAPLOTYPE=AGACAAAGAAACAAACAAACAAAACAAAACTATATATATATATATATACACACACACACACACACACACATTATTAAAATTCAGATTTAAATAAACTGACTATAAAAAAGTACTTTTGAAACAAAAACTTTAATCATGATTATATATATTA;SVLEN=18;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr20 18675721 CPX_chr20:18675721-18675877 TATGTGTATATTTACACACATATATATGTAAATATACCTATGTGTATATTTACACACATATATATGTGTAAATATACCTATGTGTATATTTACACACATATATGTAAATATACCTATGTGTATATTTACACATATATATGTAAATATACCTATGTGT . . ALIGN_LENGTHS=564;ALT_ARRANGEMENT=1,UINS-28,1,UINS-64,1;CTG_NAMES=asm028012:tig00004;END=18675877;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=564;SEGMENTS=chr20:18675721-18675877;SEQ_ALT_HAPLOTYPE=TATGTGTATATTTACACACATATATATGTAAATATACCTATGTGTATATTTACACACATATATATGTGTAAATATACCTATGTGTATATTTACACACATATATATGTGTAAATATACCTATGTGTATATTTACACATATATATGTAAATATACCTATGTGTATGTTTACACATATATATGTAAATATACCTATGTGTATGTTTACACATATATATGTGTAAATATACCTATGTGTATGTTTACACATATATATGTGTAAATATACCTATGTGTATGTTTACACATATATGTAAATATACCTATGTGTATGTTTACACATATATGTGTAAATATACCGATGTGTATGTTTACACATATATGTGTAAATATACCTATGTGTATGTTTACACATATATGTGTAAATATACCTATGTGTATGTTTACACATATATATGTGTAAATATACCTATGTGTATGTTTACACATATATATGTGTAAATATACCTATGTGTGTGTTTACACATATATATGTGTAAATATACCTATGTGTGTGTTTACACATATATATGTAAATATACCTATGTGT;SVLEN=157;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr20 20269131 CPX_chr20:20269131-20269199 ATATATATATATATATACACACACACACACACACATACATATATGTATATACACACACATATATACATA . . ALIGN_LENGTHS=40;ALT_ARRANGEMENT=-chrX:137700299-137700331;CTG_NAMES=asm028026:tig00000;END=20269199;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=40;SEGMENTS=chr20:20269131-20269199;SEQ_ALT_HAPLOTYPE=ACACACACACACACACACACACACACACACACACA;SVLEN=69;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr20 23122561 CPX_chr20:23122561-23122996 CTGTCATGTCACCGTGTGGGAGGGCTTGCAGGTGAAGTGGTCTGGGAGGGGTCCCCCAGACAAAGCCAAGGTTCTGAGAGTTGGCCCGAACACTGCTGGATTCCACGGCGGGGGCTTGGGACAGCCGGACTCAGAGACAGTAGGAGGTGACAATGTATATACATCACAATGATCACAATATAGCCCCACACGTGCCTTCCCCACCAACAGTCAGCTTGCCATGTCCCTTCCCCGACATCCCCATCTTAGTCCCAACACAGAGGCTCCCTCTGCCCTGTGTCTGTTTAGCCAGGATGCCACGGTTCTTCCCCCTTCTCCCATGTCCTCCTCCACACACCCTTCAAGGGCAACCCCAGAGATTTCTCCTCCATGATTTCTAAAGTCAGTGGTAAATCTGTTCCTCTTTAAAATCTCATCACACTGTTCACCACTTCCC . . ALIGN_LENGTHS=876,876;ALT_ARRANGEMENT=1,2,-1;CTG_NAMES=asm028059:tig00000,asm028059:tig00001;END=23122996;HQ_MAPPINGS=2;MAPPING_QUALITIES=60,60;MAX_ALIGN_LENGTH=876;SEGMENTS=chr20:23122561-23122596,chr20:23122596-23122666,chr20:23122666-23122996;SEQ_ALT_HAPLOTYPE=CTGTCATGTCACCGTGTGGGAGGGCTTGCAGGTGAAGTGGTCTGGGAGGGGTCCCCCAGACAAAGCCAAGGTTCTGAGAGTTGGCCCGAACACTGCTGGATTCCACTTCACCTGCAAGCCCTCCCACACGGTGACATGACAGC;SVLEN=436;SVTYPE=CPX;TOTAL_MAPPINGS=2 +chr20 28561412 CPX_chr20:28561412-28561576 AACATCATATGTGACAGGCTGTCATAGTCACCATTCCCATGACCATAGTCATGGGAATCTTGCTCGTGTTACAGGGAAGCACTGTCCAACATCATATGTGACAGGTTCTCATCGTCACCATCATCATGGGAATCTTGTGCATGTTACAGGGAAGCATTGTCCTAC . . ALIGN_LENGTHS=416;ALT_ARRANGEMENT=UINS-74,1,UINS-74;CTG_NAMES=asm008294:tig00014;END=28561576;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=416;SEGMENTS=chr20:28561413-28561575;SEQ_ALT_HAPLOTYPE=AACATCATATGTGACAGGCTCTCATAGTCACCATCATCACAGAAATCTCGCGCATGTTACAGAGAAGCACTGTCCTACATCATATGTGACAGGCTCTCATAGTCACCATTCCCATGACCATAGTCATGGGAATCTTGCTCATGTTACAGGGAAGCACTGTCCAACATCATATGTGACAGGCTCTCATCGTCACCATCATCATGGGAATCTTGTGCATGTTACAGGGAAGCATTGTCCTACATCGTATGTGACAGGCTCTCATAATCACTATCATCATGGGAATCCTGTGCATGTTACAGGAAAGAACTGTCCTGC;SVLEN=165;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr20 38123803 CPX_chr20:38123803-38124181 ACCTATTATATATAATATACCTATTATATATAAGATACCTATTATATATAATATACCTATTATATATAAGATACCTATTATATATAATATACATATACCTATTATATATATAATATACCTATTATATATATATAATATACCTATTATATATATAATATACCTATTATATATATAATATACCTATTATATATATAATATACCTATTATATATAATACACATTATATATAATATATAACACACATTATATATAATATATAACACACATTATATATTATATATAATACACATTATATATTATATATAATACACATATATTATATATAATACACATTATATATTATATATAATACACATATATTATATATAATACACATTATA . . ALIGN_LENGTHS=1516,1516;ALT_ARRANGEMENT=1,2,3,UINS-5,1,UINS-531,3,4,3,4;CTG_NAMES=asm028411:tig00000,asm028411:tig00001;END=38124181;HQ_MAPPINGS=2;MAPPING_QUALITIES=60,60;MAX_ALIGN_LENGTH=1516;SEGMENTS=chr20:38123803-38123868,chr20:38123868-38124004,chr20:38124004-38124014,chr20:38124014-38124181;SEQ_ALT_HAPLOTYPE=ATACCTATTATATATAATATACCTATTATATATAAGATACCTATTATATATAATATACATATACCTATTATATATATAATATACCTATTATATATATATAATATACCTATTATATATAATATACCTATTATATATATAATATACCTATTATATATATAATATACCTATTATATATATAATATACCTATTATATATAATATACCTATTATATATAAAATATACCTATTATATATAATATACCTATTATATACATTATATATAATATACCTATTATATGCATTATATATAATATACCTATTATATGCATTATATATAATATACCTATTATATGCATTATATATAATATACCTATTATATGCATTATATATAATATACCTATTATATGCATTATATAATATATTATATATAATATGCATATTATATGTATTATATATTATATATTATATATATAATATACATATTATATGTATTATATATTATATATTATATATATAATATACATATTATATGTATTATATATTATATATAATATACATATTATATATTATATATTATATACATATTATATATTATATATTATATACATATTATATATTATATACATATTATATATTATATATTATATACATATTATACATTATATATATCTAAAATATATAATACACATTATATATTATATAATACACATTATATATAATATATAATACACATTATATATTATATATAATACACATTATATATTATATATAATACACATTATATATTATATAATACACATTATATATTATATATAATACACATTATATATTATATATAATACACATTATATATTATATATAATACACATTATATATTATATATAATACACATTATATATTATATAATACACATTATATATTATATATAATACACATTATATATATAATACACATTATATATTATATATAATACATAATACACATTATATATTATATATAATACACATTATATATAATATATAACACACATTATATATAATATATAACACACATTATATATTATATATAATACACATTATATATTATATATAATACACATATATTATATATAATACACATTATATATTATATATAATACACATATATTATATATAATACACATTATA;SVLEN=379;SVTYPE=CPX;TOTAL_MAPPINGS=2 +chr20 38653054 CPX_chr20:38653054-38653283 TGGTGGTGGTGGTGATGGAAATGATGATGTTAGTTGTGGTGTTGATGATGGTAATGATAATGATTGTGATGGTGGTGTTGGTGGTGCTGGTGATGATAATGATGGTGGTGGTGGTGGTGGTGATGATGGTGGTGGTGGTGATGGAAATGATGATGATGTTAGTTGTGGTGTTGATGATGGTAATGATAATGATTGTGATGATGGTGGTGGTGGTGGTGATGATGGTGATG . . ALIGN_LENGTHS=1534;ALT_ARRANGEMENT=1,2,3,4,3,1,2,3,4,5,2,3,4,5;CTG_NAMES=asm028418:tig00000;END=38653283;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=1534;SEGMENTS=chr20:38653054-38653113,chr20:38653113-38653145,chr20:38653145-38653179,chr20:38653179-38653273,chr20:38653273-38653283;SEQ_ALT_HAPLOTYPE=TGGTGGTGGTGGTGATGGAAATGATGATGTTAGTTGTGGTGTTGATGATGGTAATGATAATGATTGTGATGGTGGTGTTGGTGGTGCTGGTGATGATAATGATGGTGGTGGTGGTGGTGGTGATGATGGTGGTGGTGGTGATGGAAATGATGATGATGTTAGTTGTGGTGTTGATGATGGTAATGATAATGATTGTGATGGTGGTGTTGGTGGTGGTGATAATGATGGTAGTGGTGGTGGTGATGATGGTGATGATGATTATGATGGTGTGTTGGTGGTGCTGGTGATGATAATGGTGGTGGTGGTGGTGGTGATGGAAATGATGATGATGTTAATTGTGGTGTTGATGATGGTAATGATAATGATTGTGATGGTGGTGTTGGTGGTGCTGGTGATGATAATGGTGGTGGTGGTGGTGATGGTGATGATGATTATGATGGTGGTGGTGGTGGTGGTGGTGCTGGTGATAGTGGTGGTGGTGGTGCTGGTGATGATAATGGTGGTGGTGGTGATGATGGTGATGATGATTATGATGGTGGTGTTGGTGGTGCTGGTGATGATAATCATGCTGGTGGTGGTGGCGTTGATGATGGTGACAGTAGTGGTGATGATGGTGGTGGTGGTGATGGAAATGATGATGATGTTAGTTGTGGTGTTGATGATGGTAATGATAATGATTGTGATGATGGTGGTGGTGGTGGTGATGATGGTGATG;SVLEN=230;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr20 47895195 CPX_chr20:47895195-47895581 AAGGGTCCGGAAGCTGGCGACCCAGGGAGGAGAGGTCTTGTTCTATTGTGAGGGTCGTGGAGCTGCCGACCCAGGGAGGAGCAATTTTGTTGTAATTTGAGGGTCCGGAAGCTGGCGACCCAGGGAGGAGAGGTCTTGATCCGGTCTGAGGGTCGTGGAGCTACAGACCCAGGGTGGAGCAATGTTGTTGTAGTTCGAGGGTCTGGTAGCTGGCAACTTGGGGAAGACAAGTCTTGTTCGAGTTCGAGGGTCGTGGAGCTGCAGACCCAGGGAGGAGCCATTTTGTTGTAGTTCAAGGGTCCGGAAACTGGCGACCCAGGGAGGAGAGGTCTTGTTCTAGTTTCAGGGTCGTGGAGCTTCTGACCCAGGGAGGAGCATTGTTGTTTT . . ALIGN_LENGTHS=289;ALT_ARRANGEMENT=2;CTG_NAMES=asm028508:tig00005;END=47895581;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=289;SEGMENTS=chr20:47895195-47895294,chr20:47895294-47895482,chr20:47895482-47895581;SEQ_ALT_HAPLOTYPE=AAGGGTCCGGAAGCTGGTGACCCAGGGAGGAGAGGTCTTGATCTGGTCTGAGGGTCGTGGAGCTACAGACCCAGGGAGGAGCAATGTTGTTGTAGTTCGAGGGTCCGGTAGCTGGCAACCCGGGAAAGAGAAGTCTTGTTCGAGTTTGAGGGTCGTGGAGCTGCAGACCCAGGGAGGAGCAATTTTGTTGT;SVLEN=387;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr20 51740560 CPX_chr20:51740560-51740561 AT . . ALIGN_LENGTHS=1104,1104;ALT_ARRANGEMENT=-chr18:11642876-11642927,UINS-496;CTG_NAMES=asm028558:tig00002,asm028558:tig00003;END=51740561;HQ_MAPPINGS=2;MAPPING_QUALITIES=60,60;MAX_ALIGN_LENGTH=1104;SEQ_ALT_HAPLOTYPE=AATTAGGCTGGGCACAGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGTGGATCACCTGAGGTCGGGAGTTCAAGACCAGCCTAACCAACATGAGGAAACCCCGTCTCTACTAAAAATACAAAATTAGATGGGCGTGGTGGCGCATGCCTGTAATTCAAACTACTTGGAAGGCTGAGGCAGGAGAATTGCTTGAACCCAGGAGACAGAGGTTGTGGTAAGCCAAGATCATGCCATTGTACTCCAGCATGGGCAACAAGAGTGAGACTCCATCTCAAAAAAAAAAAAAATTAGCCAGGCGTGGTGGTGGGCACCTGTAATCCCAGCTACCCTGGAGACTGAGGCAGAAGAATCGCTTGAACCCAGGAGGCGGAGATTGCAGTGAGCCAAGATTACGCCACTGCACTCCAGCCTGGGCACCAAGAGCAAAACCCTGTCTCAAAAAAATTAACAAATAAAAAGATTTCTGTCTGCCACACGGCTGGTCCATGTGTAAAGACACATTCCTGTTGGTTTTATGTGTCTTGAATTCTAATGGGT;SVLEN=2;SVTYPE=CPX;TOTAL_MAPPINGS=2 +chr20 51740560 CPX_chr20:51740560-51741035 ATTTTGTGTTGTTGTTTTTGTTTTTTGAGACAAGGTCTCATTCTGTCACCCAGGCAGGACTGTGGTGGCACCATCATGGCTCAGCGCAGCCTCCTTTTCCCCAGGCTCAAGTGATCCTCTTGCCTCAGCCTCCCACGTGGCTGGGACTACAGGTGTGTACCACCACTCCCGGATAATTTTTTTTATTTTTTATTTTTAGTAAAGACAGTCTCACTATGTTGCCCAGGCTGGTCTCCAACTCCTGGTCTCAAGCAATCCTCCCAGTTCAGCCTCTCAAAGTGCTGGGATTACAGATGTGAGCCACAATACCCGGCCCCAATTCTAATGTTTAAAGAGTACAGTCTACACCTTAAAGCCTGCATTTTATCATCCTGTCCTCACTGCTCTGACTTCTTTACAGTTGTGCTGTCCACCTTGGCGGCTTCTACCACATGTGGCTATTTTAAGTTTCAATTAATTAAAATTAAATTTTAATT . . ALIGN_LENGTHS=895,895;ALT_ARRANGEMENT=-chr18:11642876-11642927,UINS-496,1,UINS-49;CTG_NAMES=asm028558:tig00000,asm028558:tig00001;END=51741035;HQ_MAPPINGS=2;MAPPING_QUALITIES=60,60;MAX_ALIGN_LENGTH=895;SEGMENTS=chr20:51740561-51741034;SEQ_ALT_HAPLOTYPE=AATTAGGCTGGGCACAGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGTGGATCACCTGAGGTCGGGAGTTCAAGACCAGCCTAACCAACATGAGGAAACCCCGTCTCTACTAAAAATACAAAATTAGATGGGCGTGGTGGCGCATGCCTGTAATTCAAACTACTTGGAAGGCTGAGGCAGGAGAATTGCTTGAACCCAGGAGACAGAGGTTGTGGTAAGCCAAGATCATGCCATTGTACTCCAGCATGGGCAACAAGAGTGAGACTCCATCTCAAAAAAAAAAAAAATTAGCCAGGCGTGGTGGTGGGCACCTGTAATCCCAGCTACCCTGGAGACTGAGGCAGAAGAATCGCTTGAACCCAGGAGGCGGAGATTGCAGTGAGCCAAGATTACGCCACTGCACTCCAGCCTGGGCACCAAGAGCAAAACCCTGTCTCAAAAAAATTAACAAATAAAAAGATTTCTGTCTGCCACACGGCTGGTCCATGTGTAAAGACACATTCCTGTTGGTTTTATGTGTCTTGAATTCTAATGGGTTTTGTGTTGTTGTTTTTGTTTTTTGAGACAAGGTCTCATTCTGTCACCCAGGCAGGACTGTGGTGGCACCATCATGGCTCAGCGCAGCCTCCTTTTCCCCAGGCTCAAGTGATCCTCTTGCCTCAGCCTCCCACGTGGCTGGGACTACAGGTGTGTACCACCACTCCCGGATAATTTTTTTTATTTTTTATTTTTAGTAAAGACAGTCTCACTATGTTGCCCAGGCTGGTCTCCAACTCCTGGTCTCAAGCAATCCTCCCAGTTCAGCCTCTCAAAGTGCTGGGATTACAGATGTGAGCCACAATACCCGGCCCCAATTCTAATGTTTAAAGAGTACAGTCTACACCTTAAAGCCTGCATTTTATCATCCTGTCCTCACTGCTCTGACTTCTTTACAGTTGTGCTGTCCACCTTGGCGGCTTCTACCACATGTGGCTATTTTAAGTTTCAATTAATTAAAATTAAATTTTAATTTAATTAATTAAAAATAAATTTTAATTAATTAATTAAAAATAAATTTTAAT;SVLEN=476;SVTYPE=CPX;TOTAL_MAPPINGS=2 +chr20 54849491 CPX_chr20:54849491-54849615 CAAATCTCATGTGAAATGTATCCCCAGTGTGGAGGGGGCAGATCCTCATAATGGCTTGGGCCCTTCCATGGTAATAGTGAGTCTTGCTCTGTAGTTCATAGAGAGCTGATTGTTAAAGGAGTCTG . . ALIGN_LENGTHS=88;ALT_ARRANGEMENT=UINS-36,-chr14:58474127-58474172,UINS-54;CTG_NAMES=asm028586:tig00000;END=54849615;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=88;SEGMENTS=chr20:54849491-54849615;SEQ_ALT_HAPLOTYPE=CCAAATCTCATGTTGAAATGTAATCCCCAGTGTTGGAGGGGGGCAGATCCCTCATGAATGGCTTGGTGCCCTTCCCATGGTAATGAGTGAGTTCTTGCTCTGTTAGTTCATGAGAGAGCTGATTGTTTAAAGGAGTCTGG;SVLEN=125;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr20 58695019 CPX_chr20:58695019-58695020 GT . . ALIGN_LENGTHS=82;ALT_ARRANGEMENT=UINS-72,chr20:58695237-58695339,UINS-83,chr20:58695287-58695340,UINS-12,chr20:58695229-58695328,UINS-27,chr20:58695237-58695320;CTG_NAMES=asm028638:tig00002;END=58695020;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=82;SEQ_ALT_HAPLOTYPE=TTGCGTGTGTATGAGTGCTGGTGTGTGCATGAATGGTGTGTGTGGTGTGTTTATGTGTGGTGTGTGTTGCTGTGTGAGTGCTGGTGTGTGCATGAGTGGCGTGTGTGTGGTGTGTGTTGCTGTGTGTGTATGAGTGTTGGTGTGTGCATGAGTGGTGTGTGTGTGCTGTGTGTATGTGTGGTGTGTGTTGCTGTGTATGAGTGTTGGTGTGTGCATGAGTGGTGTGTGTGTGCTGTGTGTACGTGTGTGTGTTGCTGTGTGTGTGTTTGAGTGCTGGTGTGTGCATGAGTGGTGTGTGTGTGGTGTGTGTGGTGTCTGTTGCTGTGTGTGTTTGAGTTCTGGTATGTGCATGAGTGGTGTGTGTGTGTGCTGTGTGTGTGGTGTGTTGCTGTGTGTGTATGAGTGCTGGTGTGTGCACGAGTGGTGTGTGTATTTGCGGTGTGTGTTGCTGTGTATATGTGAGTGCTGGTGTGTGCATGAGTGGCGTGTGTGGTGTGTATGTGTGGTGTGTGTTGCTGTTTGTGTTTGAGTGCTGATGTGTGCATGAGTGG;SVLEN=2;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr20 61375650 CPX_chr20:61375650-61376102 TGGTGGTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGATGGTGTAGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGATGATGGTGGTGCTGGTGGTGGTCATAGCACTGGTGGTGATGCTATGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGCTCTTGGTGCTGGTGGTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGATGATGGTGGTGCTGGTGGTGGTCATAGCACTGGTGGTGATGGTATGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGATACTGTGGTTTGTGGATGGTAGTGTGATGGTCTTGGTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGG . . ALIGN_LENGTHS=1559;ALT_ARRANGEMENT=1,2,3,2,1,2,3;CTG_NAMES=asm028687:tig00001;END=61376102;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=1559;SEGMENTS=chr20:61375650-61375759,chr20:61375759-61375874,chr20:61375874-61376102;SEQ_ALT_HAPLOTYPE=TGGTGGTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGATGGTGTAGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGATGATGGTGGTGCTGGTGGTGGTCATAGCACTGGTGGTGATGCTATGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGCTCTTGGTGCTGGTGGTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGATGATGGTGGTGCTGGTGGTGGTCATAGCACTGGTGGTGATGGTATGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGGTGATGATAGTGGGGTTTATTGATGGTAGTGTGATGGTCTTGGTGGTGCTGATAATGGTGTGGTTTGTTGATGATAGTGTGATGGTCTTGGTGGTGGTGGCGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGGTATGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGGTGGTGGTGATGATGTAGTTTGTTGATGGTAGCGTGATGTTCTTGGTGCTGGTGGTGGTGATGATGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGGTGGTGGTGATAGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGATGCTGGTGATGGTATTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTCGTGGTGGTGGTGTGGTTTGTCGATGGTAGTGTGGTGGTCTTGGTGCTGGTGGTTGTCGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGGTGGTGATGTGGTTTGTTGATGATAGTGTGATGGTCTTGGTGGTGGTAGTGATGGTGTGGTTTGCTGATGGTAGTGTGATGGTCTTGGTGGTGGTGATACTGTGGTTTGTGGATGGTAGTGTGATGGTCTTGGTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGG;SVLEN=453;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr20 61919906 CPX_chr20:61919906-61920109 TCGTGATTATGTGGAAGCGTGGTGTCACGGTGATTGCGTGGAAGCGTGTTGTGATTGTGTGGAAGCGTGGTATCGCGGTGATTGCATGGAAGTGTGGTGTCACAGTGATTGCGTGGAAGCGTGTCGTGATTGTGTGGAAGCATGGTATCGTGATTGTGTGGAAGCGTGGTGTCACGGTGATTGCGTGGAAGCATGTTGTGATTG . . ALIGN_LENGTHS=947;ALT_ARRANGEMENT=1,2,3,UINS-177,1,2,2,3;CTG_GOOD_NONCANONICAL_MAPPING=chr20_KI270869v1_alt,32261,-,1273M69I860M17D47M6D52M4D37M6D50M6D42M6D76M3I47M6D25M3I102M3D79M76H,60,193,2065;CTG_NAMES=asm028707:tig00000;END=61920109;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=947;SEGMENTS=chr20:61919906-61919908,chr20:61919908-61920054,chr20:61920054-61920109;SEQ_ALT_HAPLOTYPE=TCGTGATTATGTGGAAGCGTGGTGTCACGGTGATTGCGTGGAAGCGTGTTGTGATTGTGTGGAAGCGTGGTATCGCGGTGATTGCATGGAAGTGTGGTGTCACAGTGATTGCGTGGAAGCGTGTCGTGATTGTGTGGAAGCATGGTATCGTGATTGTGTGGAAGCGTGGTGTCACGGTGATTGCGTGGAAGCATGTTGTGATTGTGTGGAAGCGTGGTGTGATTGTGTGGAAGCATGGTATCGTGATTGTGGAAGCGTGGTATCGCGGCGATTGTGTGGAAGCGTGGTGTCGCAGTGATTGCGTGGAAGCATGTTGTGATTGTGTGGAAGCGTGGTATCGTGATTGTGTGGAAGCATGGTGTCGTGATTGTGTGGAAGCATGTCGTGATTGTGTGGAAGTGTGATGTCACGGTGATTGCGTGGAAGCGTGTTGTGATTGTGTGGAAGCGTGGTATCGTGATTGGAAGTGTGGTGTCACGCTGATTGCATGGAAGTGTGTTGTGATTGTGTGGAAGCGTGATATCGCAGTGATTGTGTGGAAGCGTGGTGTCACGGTGATTGTGTGGAAGCGTGGTGTCACGGTGATTGCGTGGAAGCGTGTTGTGATTGTGTGGAAGCGTGGTATCGTGATCGGAAGCGTGGTGTTGCGGTGATTGCATGGAAGCATGTTGTGATTGTGTGGAAGCATGGTATCGTGATTGTCTGGAAGCATGGTGTCATGGTGATTGGAAGTGTGTCGTGATTG;SVLEN=204;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr20 62452038 CPX_chr20:62452038-62452236 GGGGAGAGAGGGAGGGTGAGAGTGGGGAGAGGGAGAGGGGAAGAAAGAGAGAAGGAGAGCGGGGAGAGAGAGGGAGAAGGGGGAGGAAGAGAGGGAGAAGGAGAGAGAGAAGGAGGGAGAGAGGGAGAGGGAGAGAAAGAGAAAGGGAAGGAGAGAGGGAGGGAAGGAAAGAGGGAGGGAGAGAAGGAAGCAGAGAGGG . . ALIGN_LENGTHS=235;ALT_ARRANGEMENT=1,UINS-148,1,2;CTG_NAMES=asm028732:tig00001;END=62452236;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=235;SEGMENTS=chr20:62452038-62452041,chr20:62452041-62452087,chr20:62452087-62452236;SEQ_ALT_HAPLOTYPE=GGGGGGGGGGGGGGGGGGGGAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAGGGAGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAGGGGGGGGGGGGGGGGGGGAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAGAGAGGGAGGGTGAGAGTGGGGAGAGGGAGAGGGGAAGAAAGAGAG;SVLEN=199;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr20 63092255 CPX_chr20:63092255-63094246 GGCCCAGGTTCCCGGGGCTGCGTGGGAGACACAGAAGTGGGGGCACCTCTGCAGCACCCACATCCTGCCGACGCCATGCACGTGAGTGTGCATGTGGGGTGGGGGTGCGCCAGGAAGGCAAAAATCCCAAATGTCAGCGCCCGGTGATTTGAGGAGTTCAGACAAAGCCTTCCCTGGGGCTGTCCTCGGCCCACTTCCCCATCATCCCAATGTCCCTGCAGCCCTTGAAATGGCTCGGATGTGGGGCAGGAGGGCTGGGCGGATGCTGATCTGAGGCCGAGGTTGATGGATTCCGGCTCCCCCAGTGCCGATCCGGCTGCTGCCCCACGTGGGGCTTTGAGGCTGCGGCAACAGAGCCGGTGGGACACGCTCCTGATGCCAGACCCAGGGGTAAGAGCTGCCCCTGCTCCACGGTCTGCGCCACGATGGCTGCCAGAGCGAAAATGGGAGGGAGAGAAGCGCTGTCAGGGCCTCCCAGGGAGGGTGTGTGGCGTGTGGCTTGCTAAGCACCCTCTTTTCTGATCCCGCCCTGAGCAGCTCCCTGGGACTTTTGTTGGTCAAAGCTTCAGCTGTGGTCAGAGGATCCAACGCTGCCGTCAACAGGCAGCTGCTGCCGTATGGCTGCAGATGTGGTTTCTGGAACAGACGGGCCCAGCCCTGCAAGGTGGGCTGGCTCTTTGGCTCTCGAGAGCACCATGGCACAGTGCTGATCTGGGTCCACACGGCCCAGCCACCCGGTCAGCTCGCCTGGTTCTTACATTTCCACTCCACAAACAGAACCCAGACCCAGATCTGCTTCCTCCGCCTCCCGCCCTCCTCGCCCCGCACCGTTTCTTGTCCCACAGAAGCTGTCATGGGTGCTGTGTACCCCACAGGGCGGGAGAGAAAGGTGTGTCCAGGAGCAATCTCGGGAAACAGGGGGCAGAGGATGCGGTGCAGAGCCGAGCTCCGGAGCCACCTTCCTGACCGCCAGAGCCTGACTGCATCCGTATGGCATGAGGTCGGTATCTAGGAGGCCATACTCTTTACCTAACATCCAGGACACAGGAAAAGGACGGAAACAGAAAAAAATACATAAATAAATTCAAGATGCATTTTAAAGAACATGAAAAACTTTAAAAGGACGGGTAATTGTCTAACATTTTGTTAATTCAATCAGGCTTGCAATTTAAAAATAGCTTTGCCTTTTACTGAAAAAGAAATTAATTATAATTTCTTACTGAATCCAGGGAGAGAATTCGGGACTAGGGGGTTCCCTGAGAAGGTCTTCTTACACGTTACCCTCCAATGTCCTGTGGCCAAGGGAGCTTCAACCCCCTCTTTACGAGGGTTCTGGTGGCTGGAGAACTCAGAGACGGCTCTAAAAAAATTGCAAGCTGGCCAGGCACGGTGGCTCACCCCCGTATTCCCAGCACTTTAGGAGGCTGAGGCAGGCAGATCACCTGAGGTCAGGAGTTTGAGACCATCCTGGCCAACACGGTGAATCCCCGTCTCTACTAAAAATACAAAAATTAGCTGGACGTGGTGGCGCACGCCTGTAATCCCAGCTACTCAGGGGGCTGAGGCAGGAGAATCACTTGAACTTGGGAGGTGGAGGTCGCAGTGAGCCAAGATGGTGCCATTGCACCCCAGCCTGGGCGACAGAGCAAGACTTCATCTCAAAAAAATAAAAAAATGTAAACATCGCAAGCTGCACCCAGGTGTGAATCTGGAGGGCGCCTCCTTGGAAAACTTGGCTGGTGACCGCACCCTCAGTTCAAGCTCACCGGCCGCCCTTCCCTGTGCACACACGTGAGCTCTCCAGTATGTTTGGAATATCCCAGAGCCGCTGTCTCCTGGCTGCCCGGCCTCTGTGAGCCGCACTCCAGCCTGAAGGCCAAGACGGCTGCTCTGGAGAAAGAACTTCCCCTGGTGCTGAAGTCCCGAAGGAGAGCCCCATGGGCATGGGGAGGGGAGGGTGCCTCGCCCCCAAGGCTGCGCTGCTCCGCGGCC . . ALIGN_LENGTHS=942;ALT_ARRANGEMENT=UINS-70,1;CTG_NAMES=asm028762:tig00002;END=63094246;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=942;SEGMENTS=chr20:63092256-63093346,chr20:63093346-63094246;SEQ_ALT_HAPLOTYPE=GGCCCAGGTTCCCGGGGCTGCGTGGGAGACACAGAAGTGGGGGCACCTCCGCAGCACCCACATCCTGCCGATGCCCAGGTTCCCGGGGCTGCGTGGGAGACACAGAAGTGGGGGCACCTCCGCAGCACCCACATCCTGCCGACGCCATGCACGTGAGTGTGCATGTGGGGCGGGGGTGCGCCAGGAAGGCAAAAATCCCAAATGTCAGCGCCCGGTGATTTGAGGAGTTCAGACAAAGCCTTCCCTGGGGCTGTCCTCGGCCCACTTCCCCATCATCCCAATGTCCCTGCAGCCCTTGAAATGGCTCGGATGTGGGGCAGGAGGGCTGGGCGGATGCTGATCTGAGGCCGAGGTTGATGGATTCCGGCTCCCCCAGTGCCGATCCGGCTGCTGCCCCACGTGGGGCTTTGAGGCTGCGGCAACAGAGCCGGTGGGACACGCTCCTGATGCCAGACCCAGGGGTAAGAGCTGCCCCTGCTCCACGGTCTGCGCCACGATGGCTGCCAGAGCGAAAATGGGAGGGAGAGAAGCGCTGTCAGGGCCTCCCAGGGAGGGTGTGTGGCGTGTGGCTTGCTAAGCACCCTCCTTTCTGATCCCGCCCTGAGCAGCTCCCTGGGACTTTTGTTGGTCAAAGCTTCAGCTGTGGTCAGAGGATCCAACGCTGCCGTCAACAGGCAGCTGCTGCCGTATGGCTGCAGACGTGGTTTCTGGAACAGACGGGCCCAGCCCTGCAAGGTGGGCTGGCTCTTTGGCTCTCGAGAGCACCACGGCACAGTGCTGATCTGGGTCCACACGGCCCAGCCACCCGGTCAGCTCGCCTGGTTCTTACATTTCCACTCCACAAACAGAACCCAGACCCAGATCTGCTTCCTCCGCCTCCCGCCCTCCTCGCCCCGCACCGTTTCTTGTCCCACAGAAGCTGTCATGGGTGCTGTGTACCCCATAGGGCGGGAGAGAAAGGTGTGTCCAGGAGCAATCTCGGGAAACAGGGGGCAGAGGATGCGGTGCAGAGCCGAGCTCCGGAGCCACCTTCCTGACCGCCAGAGCCTGACTGCATCCGTATGGCATGAGGTCGGTATCTAGGAGGCCATACTCTTTACCTAACATCCAGGACACAGGAAAAGGACGGAAACAGAAAAAAATACATAAATAAATTCAAGATGTC;SVLEN=1992;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr20 63353949 CPX_chr20:63353949-63354272 TCGGGGGGGCTGCGGTCCTGGGAGGTCTGTGGTCCTGGGGGGCTGTGGTCCTGGCGGGGGGGCTGCGGTCCTAGGGGTGTTGTAGTCCTAGGGGGCTGTGGTCAGGCAAGAACTGTGATCCTTGTGGGGGCTGCAGTTCTGCAGGAGGCACTGTAGCCCTAGGGTGGGATGAAGTCCTGGGAGGACTGTGGTCCTGGCAGGGAATGTGGTCCTGGGGGCCTGTGGTCCTGGGGGGCTGTGGTCCTGGGGGGGCTGTGGTCCTGGGGGGCTGTGGTCCTAGGGGGCTGTGGTCCTAAATGGGGCTGTGGTCCTGGGGGGCTGTAG . . ALIGN_LENGTHS=81;ALT_ARRANGEMENT=2;CTG_NAMES=asm028777:tig00001;END=63354272;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=81;SEGMENTS=chr20:63353949-63354002,chr20:63354002-63354205,chr20:63354205-63354272;SEQ_ALT_HAPLOTYPE=TGGGGGGGAGCTGCTGTCCTAGGGGTGTTGTAGTCCTAGGGGGCTGTGGTCAGGCAAGAACTGTGATCCTTGTGGGGGCTGCAGTTCTGCAGGAGGCACTGTAGCCCTAGGGTGGGATGAAGTCCTGGGAGGACTGTGGTCCTGGCAGGGAATGTGGTCCTGGGGGCCTGTGGTCCTGGGGGGCTGTGGTCCTGGGGGAGCTGTGG;SVLEN=324;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr20 64096905 CPX_chr20:64096905-64097041 CCACCATCATCACCATCACCACTATCACCACCACCACCATCATTACCATCATCATCACGACCATCACCACCATCATCACCATCACCACTATCACCACCACCACCATCATCACCATCACCATCATCACAGTCATCACC . . ALIGN_LENGTHS=166;ALT_ARRANGEMENT=1,1,UINS-166;CTG_NAMES=asm028821:tig00001;END=64097041;HQ_MAPPINGS=0;MAPPING_QUALITIES=54;MAX_ALIGN_LENGTH=166;SEGMENTS=chr20:64096905-64097041;SEQ_ALT_HAPLOTYPE=CCACCATCATCACCATCACCACTATCACCACCACCACCATCATCACCATCACCATCATCACAGTCATCACTGTCACCATCATCACCATCCTCACTATCACCACCACCACCATCATCACCATCACCATCATCACAGTCATCACCGCCACCATCATCACCATCCTCACTATCACCACCACCACCATCATCACCATCACCATCATCACAATCATCACCGTCACCATCATCACCATCCTCACTATCACCACCACCACCATCATCACCATCACTATCATCACAGTCATCACCGTCACCATCATCACCATCCTCACTATCACCACCACTACCATCATCATCACATTCATCATCACTATTACCATCATCATCACCACCATCACCATCACTATCACCACCATCATTACATTTGTCACCATCACCACCATTATCACCATCACCGCTATCACCACCACCACCGTC;SVLEN=137;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr21 21264944 CPX_chr21:21264944-21265096 TATATATATGTGTATGTGTATATATACACATATATATTATATATGTGTATGTGTATATATACACATATATATTATATATGTGTCTGTGTATATATGTACACATATATACTATATATGTGTATGTGTATATATATACACACATATATTATATAT . . ALIGN_LENGTHS=316,316;ALT_ARRANGEMENT=1,2,3,2,1,2,3;CTG_NAMES=asm029034:tig00000,asm029034:tig00001;END=21265096;HQ_MAPPINGS=2;MAPPING_QUALITIES=60,60;MAX_ALIGN_LENGTH=316;SEGMENTS=chr21:21264944-21264988,chr21:21264988-21265052,chr21:21265052-21265096;SEQ_ALT_HAPLOTYPE=TATATATATGTGTATGTGTATATATACACATATATATTATATATGTGTATGTGTATATATACACATATATATTATATATGTGTATGTGTATATATACACATATATATTATATATGTGTATGTGTATATATATACACATATATATTATATATGTGTATGTGTATATATATACACATATATATTATATATGTGTATATGTATATATACACATATATATTATATATATATGTGTCTGTATATATATACACATATATATTATATATATGTGTCTGTGTATATATATACACATATATATGTGTCTGTGTATATATGTACACATATATACTATATATGTGTATGTGTATATATATACACACATATATTATATAT;SVLEN=153;SVTYPE=CPX;TOTAL_MAPPINGS=2 +chr21 23428920 CPX_chr21:23428920-23429023 TTTATATAAATATATATAAATATATAATATATAATAATATAATATAATATATATATAATATAATATAATATAATATATAATATATATTACATAATATATTATAT . . ALIGN_LENGTHS=1182,1182;ALT_ARRANGEMENT=UINS-84,2,3,UINS-5,2,2,3;CTG_NAMES=asm029052:tig00000,asm029052:tig00001;END=23429023;HQ_MAPPINGS=2;MAPPING_QUALITIES=60,60;MAX_ALIGN_LENGTH=1182;SEGMENTS=chr21:23428920-23428968,chr21:23428968-23428998,chr21:23428998-23429023;SEQ_ALT_HAPLOTYPE=TATAAATATATATAATAATATATAATTATATATTATATTATATAATATAATAATATATATTATAATACATTATATAATATATTATAATATATATAATATAATATATTATATTATATAATATATATTACATAATATATTATATTATATAATATATATTACATAATATATTATATTATATAATATATATTACATAATATATTATATTATATAATATATATTACATAATATATTATAT;SVLEN=104;SVTYPE=CPX;TOTAL_MAPPINGS=2 +chr21 26001843 CPX_chr21:26001843-26002386 ATCACATCTAGGCATGGCTTTGAGCTTCACAGGCCAGAAGCTGCTCTGTCAGTAACTCAGAGGGGATATGGTCCCTAGACCCAATTCACTTTTAATAAATCAGGGTGCCAGTCCTTTAATTTTGGAAGATAAATAAAATTGCGAAGTAGATGGGATACTTACGTCAACAAGTTGGTTGGGTTTCATCCTGTAAGAGTATTGTGTACAGAAATCCAATTTGCTTTGGGAGCGTGGACTTTGAAAGCAGCAGTACTGAGTGAGATGGCCTTAATGATGGATGATAATGAGGTTGATGCTTTCGTAGCCACGTTTCCATCTGAAAACCACGTGAAGAGTGTCAGATCATCTTCATGTCCGTTGCATGGCGCATTTCTCCTGCGGAGTGTTTGACACCTTCTGGGCTTAGTGCTGACTGTCCCTCCTGTTGACATCATCGTGATGGTAGCCACCTCTGGTTTTACCAGTACTTTATTGCATCTACTGAAAGAGCAACGTGTATAGGGAAGTAAAACAATAGTTAGTCCCCATTTGTATTGACATTGTG . . ALIGN_LENGTHS=1370;ALT_ARRANGEMENT=-1;CTG_NAMES=asm029075:tig00000;END=26002386;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=1370;SEGMENTS=chr21:26001844-26002384,chr21:26002384-26002386;SEQ_ALT_HAPLOTYPE=ACAATGTCAATACAAATGGGGACTAACTATTGTTTTACTTCCCTATACACGTTGCTCTTTCAGTAGATGCAATAAAGTACTGGTAAAACCAGAGGTGGCTACCATCACGATGATGTCAACAGGAGGGACAGTCAGCACTAAGCCCAGAAGGTGTCAAACACTCCGCAGGAGAAATGCGCCATGCAACGGACATGAAGATGATCTGACACTCTTCACGTGGTTTTCAGATGGAAACGTGGCTACGAAAGCATCAACCTCATTATCATCCATCATTAAGGCCATCTCACTCAGTACTGCTGCTTTCAAAGTCCACGCTCCCAAAGCAAATTGGATTTCTGTACACAATACTCTTACAGGATGAAACCCAACCAACTTGTTGACGTAAGTATCCCATCTACTTCGCAATTTTATTTATCTTCCAAAATTAAAGGACTGGCACCCTGATTTATTAAAAGTGAATTGGTTCTAGGGACCATATCCCCTCTGAGTTACTGACAGAGCAGCTTCTGGCCTGTGAAGCTCAAAGCCATGCCTAGATGTGAG;SVLEN=544;SVTYPE=CPX;TOTAL_MAPPINGS=1 +chr21 46069065 CPX_chr21:46069065-46069209 CACACGTGTGTGCATGTGTGTGCATGTGTGCCTGGGTGTGTGTGCACGCCTGTGTGAGCATGTGTGCCTGAATGTGTGTGCATGTGTGTGCAAGTTCATGTGTGTGCCTGTGTGCATGTGTGCCTGTGTGTACGTGTGTGTGCCA . . ALIGN_LENGTHS=154,154;ALT_ARRANGEMENT=UINS-59,1;CTG_NAMES=asm029362:tig00001,asm029362:tig00002;END=46069209;HQ_MAPPINGS=2;MAPPING_QUALITIES=60,60;MAX_ALIGN_LENGTH=154;SEGMENTS=chr21:46069066-46069156,chr21:46069156-46069209;SEQ_ALT_HAPLOTYPE=CCTAGGTGTGTGCATGTGTGCACACGTGTGTGCATGTGTGTGCATGTGTGCACACGTGTGTGCATGTGTGTGCATGTGTGTGCATGTGTGCCTGGGTGTGTGTGCACGCCTGTGTGAGCATGTGTGCCTGGATGTGTGTGCATGTGTGTGCAA;SVLEN=145;SVTYPE=CPX;TOTAL_MAPPINGS=2 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/inputs/Homo_sapiens_assembly38.kill.alts b/src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/inputs/Homo_sapiens_assembly38.kill.alts new file mode 100644 index 00000000000..89d5544c986 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/inputs/Homo_sapiens_assembly38.kill.alts @@ -0,0 +1,3341 @@ +chr1_KI270706v1_random +chr1_KI270707v1_random +chr1_KI270708v1_random +chr1_KI270709v1_random +chr1_KI270710v1_random +chr1_KI270711v1_random +chr1_KI270712v1_random +chr1_KI270713v1_random +chr1_KI270714v1_random +chr2_KI270715v1_random +chr2_KI270716v1_random +chr3_GL000221v1_random +chr4_GL000008v2_random +chr5_GL000208v1_random +chr9_KI270717v1_random +chr9_KI270718v1_random +chr9_KI270719v1_random +chr9_KI270720v1_random +chr11_KI270721v1_random +chr14_GL000009v2_random +chr14_GL000225v1_random +chr14_KI270722v1_random +chr14_GL000194v1_random +chr14_KI270723v1_random +chr14_KI270724v1_random +chr14_KI270725v1_random +chr14_KI270726v1_random +chr15_KI270727v1_random +chr16_KI270728v1_random +chr17_GL000205v2_random +chr17_KI270729v1_random +chr17_KI270730v1_random +chr22_KI270731v1_random +chr22_KI270732v1_random +chr22_KI270733v1_random +chr22_KI270734v1_random +chr22_KI270735v1_random +chr22_KI270736v1_random +chr22_KI270737v1_random +chr22_KI270738v1_random +chr22_KI270739v1_random +chrY_KI270740v1_random +chrUn_KI270302v1 +chrUn_KI270304v1 +chrUn_KI270303v1 +chrUn_KI270305v1 +chrUn_KI270322v1 +chrUn_KI270320v1 +chrUn_KI270310v1 +chrUn_KI270316v1 +chrUn_KI270315v1 +chrUn_KI270312v1 +chrUn_KI270311v1 +chrUn_KI270317v1 +chrUn_KI270412v1 +chrUn_KI270411v1 +chrUn_KI270414v1 +chrUn_KI270419v1 +chrUn_KI270418v1 +chrUn_KI270420v1 +chrUn_KI270424v1 +chrUn_KI270417v1 +chrUn_KI270422v1 +chrUn_KI270423v1 +chrUn_KI270425v1 +chrUn_KI270429v1 +chrUn_KI270442v1 +chrUn_KI270466v1 +chrUn_KI270465v1 +chrUn_KI270467v1 +chrUn_KI270435v1 +chrUn_KI270438v1 +chrUn_KI270468v1 +chrUn_KI270510v1 +chrUn_KI270509v1 +chrUn_KI270518v1 +chrUn_KI270508v1 +chrUn_KI270516v1 +chrUn_KI270512v1 +chrUn_KI270519v1 +chrUn_KI270522v1 +chrUn_KI270511v1 +chrUn_KI270515v1 +chrUn_KI270507v1 +chrUn_KI270517v1 +chrUn_KI270529v1 +chrUn_KI270528v1 +chrUn_KI270530v1 +chrUn_KI270539v1 +chrUn_KI270538v1 +chrUn_KI270544v1 +chrUn_KI270548v1 +chrUn_KI270583v1 +chrUn_KI270587v1 +chrUn_KI270580v1 +chrUn_KI270581v1 +chrUn_KI270579v1 +chrUn_KI270589v1 +chrUn_KI270590v1 +chrUn_KI270584v1 +chrUn_KI270582v1 +chrUn_KI270588v1 +chrUn_KI270593v1 +chrUn_KI270591v1 +chrUn_KI270330v1 +chrUn_KI270329v1 +chrUn_KI270334v1 +chrUn_KI270333v1 +chrUn_KI270335v1 +chrUn_KI270338v1 +chrUn_KI270340v1 +chrUn_KI270336v1 +chrUn_KI270337v1 +chrUn_KI270363v1 +chrUn_KI270364v1 +chrUn_KI270362v1 +chrUn_KI270366v1 +chrUn_KI270378v1 +chrUn_KI270379v1 +chrUn_KI270389v1 +chrUn_KI270390v1 +chrUn_KI270387v1 +chrUn_KI270395v1 +chrUn_KI270396v1 +chrUn_KI270388v1 +chrUn_KI270394v1 +chrUn_KI270386v1 +chrUn_KI270391v1 +chrUn_KI270383v1 +chrUn_KI270393v1 +chrUn_KI270384v1 +chrUn_KI270392v1 +chrUn_KI270381v1 +chrUn_KI270385v1 +chrUn_KI270382v1 +chrUn_KI270376v1 +chrUn_KI270374v1 +chrUn_KI270372v1 +chrUn_KI270373v1 +chrUn_KI270375v1 +chrUn_KI270371v1 +chrUn_KI270448v1 +chrUn_KI270521v1 +chrUn_GL000195v1 +chrUn_GL000219v1 +chrUn_GL000220v1 +chrUn_GL000224v1 +chrUn_KI270741v1 +chrUn_GL000226v1 +chrUn_GL000213v1 +chrUn_KI270743v1 +chrUn_KI270744v1 +chrUn_KI270745v1 +chrUn_KI270746v1 +chrUn_KI270747v1 +chrUn_KI270748v1 +chrUn_KI270749v1 +chrUn_KI270750v1 +chrUn_KI270751v1 +chrUn_KI270752v1 +chrUn_KI270753v1 +chrUn_KI270754v1 +chrUn_KI270755v1 +chrUn_KI270756v1 +chrUn_KI270757v1 +chrUn_GL000214v1 +chrUn_KI270742v1 +chrUn_GL000216v2 +chrUn_GL000218v1 +chr1_KI270762v1_alt +chr1_KI270766v1_alt +chr1_KI270760v1_alt +chr1_KI270765v1_alt +chr1_GL383518v1_alt +chr1_GL383519v1_alt +chr1_GL383520v2_alt +chr1_KI270764v1_alt +chr1_KI270763v1_alt +chr1_KI270759v1_alt +chr1_KI270761v1_alt +chr2_KI270770v1_alt +chr2_KI270773v1_alt +chr2_KI270774v1_alt +chr2_KI270769v1_alt +chr2_GL383521v1_alt +chr2_KI270772v1_alt +chr2_KI270775v1_alt +chr2_KI270771v1_alt +chr2_KI270768v1_alt +chr2_GL582966v2_alt +chr2_GL383522v1_alt +chr2_KI270776v1_alt +chr2_KI270767v1_alt +chr3_JH636055v2_alt +chr3_KI270783v1_alt +chr3_KI270780v1_alt +chr3_GL383526v1_alt +chr3_KI270777v1_alt +chr3_KI270778v1_alt +chr3_KI270781v1_alt +chr3_KI270779v1_alt +chr3_KI270782v1_alt +chr3_KI270784v1_alt +chr4_KI270790v1_alt +chr4_GL383528v1_alt +chr4_KI270787v1_alt +chr4_GL000257v2_alt +chr4_KI270788v1_alt +chr4_GL383527v1_alt +chr4_KI270785v1_alt +chr4_KI270789v1_alt +chr4_KI270786v1_alt +chr5_KI270793v1_alt +chr5_KI270792v1_alt +chr5_KI270791v1_alt +chr5_GL383532v1_alt +chr5_GL949742v1_alt +chr5_KI270794v1_alt +chr5_GL339449v2_alt +chr5_GL383530v1_alt +chr5_KI270796v1_alt +chr5_GL383531v1_alt +chr5_KI270795v1_alt +chr6_GL000250v2_alt +chr6_KI270800v1_alt +chr6_KI270799v1_alt +chr6_GL383533v1_alt +chr6_KI270801v1_alt +chr6_KI270802v1_alt +chr6_KB021644v2_alt +chr6_KI270797v1_alt +chr6_KI270798v1_alt +chr7_KI270804v1_alt +chr7_KI270809v1_alt +chr7_KI270806v1_alt +chr7_GL383534v2_alt +chr7_KI270803v1_alt +chr7_KI270808v1_alt +chr7_KI270807v1_alt +chr7_KI270805v1_alt +chr8_KI270818v1_alt +chr8_KI270812v1_alt +chr8_KI270811v1_alt +chr8_KI270821v1_alt +chr8_KI270813v1_alt +chr8_KI270822v1_alt +chr8_KI270814v1_alt +chr8_KI270810v1_alt +chr8_KI270819v1_alt +chr8_KI270820v1_alt +chr8_KI270817v1_alt +chr8_KI270816v1_alt +chr8_KI270815v1_alt +chr9_GL383539v1_alt +chr9_GL383540v1_alt +chr9_GL383541v1_alt +chr9_GL383542v1_alt +chr9_KI270823v1_alt +chr10_GL383545v1_alt +chr10_KI270824v1_alt +chr10_GL383546v1_alt +chr10_KI270825v1_alt +chr11_KI270832v1_alt +chr11_KI270830v1_alt +chr11_KI270831v1_alt +chr11_KI270829v1_alt +chr11_GL383547v1_alt +chr11_JH159136v1_alt +chr11_JH159137v1_alt +chr11_KI270827v1_alt +chr11_KI270826v1_alt +chr12_GL877875v1_alt +chr12_GL877876v1_alt +chr12_KI270837v1_alt +chr12_GL383549v1_alt +chr12_KI270835v1_alt +chr12_GL383550v2_alt +chr12_GL383552v1_alt +chr12_GL383553v2_alt +chr12_KI270834v1_alt +chr12_GL383551v1_alt +chr12_KI270833v1_alt +chr12_KI270836v1_alt +chr13_KI270840v1_alt +chr13_KI270839v1_alt +chr13_KI270843v1_alt +chr13_KI270841v1_alt +chr13_KI270838v1_alt +chr13_KI270842v1_alt +chr14_KI270844v1_alt +chr14_KI270847v1_alt +chr14_KI270845v1_alt +chr14_KI270846v1_alt +chr15_KI270852v1_alt +chr15_KI270851v1_alt +chr15_KI270848v1_alt +chr15_GL383554v1_alt +chr15_KI270849v1_alt +chr15_GL383555v2_alt +chr15_KI270850v1_alt +chr16_KI270854v1_alt +chr16_KI270856v1_alt +chr16_KI270855v1_alt +chr16_KI270853v1_alt +chr16_GL383556v1_alt +chr16_GL383557v1_alt +chr17_GL383563v3_alt +chr17_KI270862v1_alt +chr17_KI270861v1_alt +chr17_KI270857v1_alt +chr17_JH159146v1_alt +chr17_JH159147v1_alt +chr17_GL383564v2_alt +chr17_GL000258v2_alt +chr17_GL383565v1_alt +chr17_KI270858v1_alt +chr17_KI270859v1_alt +chr17_GL383566v1_alt +chr17_KI270860v1_alt +chr18_KI270864v1_alt +chr18_GL383567v1_alt +chr18_GL383570v1_alt +chr18_GL383571v1_alt +chr18_GL383568v1_alt +chr18_GL383569v1_alt +chr18_GL383572v1_alt +chr18_KI270863v1_alt +chr19_KI270868v1_alt +chr19_KI270865v1_alt +chr19_GL383573v1_alt +chr19_GL383575v2_alt +chr19_GL383576v1_alt +chr19_GL383574v1_alt +chr19_KI270866v1_alt +chr19_KI270867v1_alt +chr19_GL949746v1_alt +chr20_GL383577v2_alt +chr20_KI270869v1_alt +chr20_KI270871v1_alt +chr20_KI270870v1_alt +chr21_GL383578v2_alt +chr21_KI270874v1_alt +chr21_KI270873v1_alt +chr21_GL383579v2_alt +chr21_GL383580v2_alt +chr21_GL383581v2_alt +chr21_KI270872v1_alt +chr22_KI270875v1_alt +chr22_KI270878v1_alt +chr22_KI270879v1_alt +chr22_KI270876v1_alt +chr22_KI270877v1_alt +chr22_GL383583v2_alt +chr22_GL383582v2_alt +chrX_KI270880v1_alt +chrX_KI270881v1_alt +chr19_KI270882v1_alt +chr19_KI270883v1_alt +chr19_KI270884v1_alt +chr19_KI270885v1_alt +chr19_KI270886v1_alt +chr19_KI270887v1_alt +chr19_KI270888v1_alt +chr19_KI270889v1_alt +chr19_KI270890v1_alt +chr19_KI270891v1_alt +chr1_KI270892v1_alt +chr2_KI270894v1_alt +chr2_KI270893v1_alt +chr3_KI270895v1_alt +chr4_KI270896v1_alt +chr5_KI270897v1_alt +chr5_KI270898v1_alt +chr6_GL000251v2_alt +chr7_KI270899v1_alt +chr8_KI270901v1_alt +chr8_KI270900v1_alt +chr11_KI270902v1_alt +chr11_KI270903v1_alt +chr12_KI270904v1_alt +chr15_KI270906v1_alt +chr15_KI270905v1_alt +chr17_KI270907v1_alt +chr17_KI270910v1_alt +chr17_KI270909v1_alt +chr17_JH159148v1_alt +chr17_KI270908v1_alt +chr18_KI270912v1_alt +chr18_KI270911v1_alt +chr19_GL949747v2_alt +chr22_KB663609v1_alt +chrX_KI270913v1_alt +chr19_KI270914v1_alt +chr19_KI270915v1_alt +chr19_KI270916v1_alt +chr19_KI270917v1_alt +chr19_KI270918v1_alt +chr19_KI270919v1_alt +chr19_KI270920v1_alt +chr19_KI270921v1_alt +chr19_KI270922v1_alt +chr19_KI270923v1_alt +chr3_KI270924v1_alt +chr4_KI270925v1_alt +chr6_GL000252v2_alt +chr8_KI270926v1_alt +chr11_KI270927v1_alt +chr19_GL949748v2_alt +chr22_KI270928v1_alt +chr19_KI270929v1_alt +chr19_KI270930v1_alt +chr19_KI270931v1_alt +chr19_KI270932v1_alt +chr19_KI270933v1_alt +chr19_GL000209v2_alt +chr3_KI270934v1_alt +chr6_GL000253v2_alt +chr19_GL949749v2_alt +chr3_KI270935v1_alt +chr6_GL000254v2_alt +chr19_GL949750v2_alt +chr3_KI270936v1_alt +chr6_GL000255v2_alt +chr19_GL949751v2_alt +chr3_KI270937v1_alt +chr6_GL000256v2_alt +chr19_GL949752v1_alt +chr6_KI270758v1_alt +chr19_GL949753v2_alt +chr19_KI270938v1_alt +chrEBV +chrUn_KN707606v1_decoy +chrUn_KN707607v1_decoy +chrUn_KN707608v1_decoy +chrUn_KN707609v1_decoy +chrUn_KN707610v1_decoy +chrUn_KN707611v1_decoy +chrUn_KN707612v1_decoy +chrUn_KN707613v1_decoy +chrUn_KN707614v1_decoy +chrUn_KN707615v1_decoy +chrUn_KN707616v1_decoy +chrUn_KN707617v1_decoy +chrUn_KN707618v1_decoy +chrUn_KN707619v1_decoy +chrUn_KN707620v1_decoy +chrUn_KN707621v1_decoy +chrUn_KN707622v1_decoy +chrUn_KN707623v1_decoy +chrUn_KN707624v1_decoy +chrUn_KN707625v1_decoy +chrUn_KN707626v1_decoy +chrUn_KN707627v1_decoy +chrUn_KN707628v1_decoy +chrUn_KN707629v1_decoy +chrUn_KN707630v1_decoy +chrUn_KN707631v1_decoy +chrUn_KN707632v1_decoy +chrUn_KN707633v1_decoy +chrUn_KN707634v1_decoy +chrUn_KN707635v1_decoy +chrUn_KN707636v1_decoy +chrUn_KN707637v1_decoy +chrUn_KN707638v1_decoy +chrUn_KN707639v1_decoy +chrUn_KN707640v1_decoy +chrUn_KN707641v1_decoy +chrUn_KN707642v1_decoy +chrUn_KN707643v1_decoy +chrUn_KN707644v1_decoy +chrUn_KN707645v1_decoy +chrUn_KN707646v1_decoy +chrUn_KN707647v1_decoy +chrUn_KN707648v1_decoy +chrUn_KN707649v1_decoy +chrUn_KN707650v1_decoy +chrUn_KN707651v1_decoy +chrUn_KN707652v1_decoy +chrUn_KN707653v1_decoy +chrUn_KN707654v1_decoy +chrUn_KN707655v1_decoy +chrUn_KN707656v1_decoy +chrUn_KN707657v1_decoy +chrUn_KN707658v1_decoy +chrUn_KN707659v1_decoy +chrUn_KN707660v1_decoy +chrUn_KN707661v1_decoy +chrUn_KN707662v1_decoy +chrUn_KN707663v1_decoy +chrUn_KN707664v1_decoy +chrUn_KN707665v1_decoy +chrUn_KN707666v1_decoy +chrUn_KN707667v1_decoy +chrUn_KN707668v1_decoy +chrUn_KN707669v1_decoy +chrUn_KN707670v1_decoy +chrUn_KN707671v1_decoy +chrUn_KN707672v1_decoy +chrUn_KN707673v1_decoy +chrUn_KN707674v1_decoy +chrUn_KN707675v1_decoy +chrUn_KN707676v1_decoy +chrUn_KN707677v1_decoy +chrUn_KN707678v1_decoy +chrUn_KN707679v1_decoy +chrUn_KN707680v1_decoy +chrUn_KN707681v1_decoy +chrUn_KN707682v1_decoy +chrUn_KN707683v1_decoy +chrUn_KN707684v1_decoy +chrUn_KN707685v1_decoy +chrUn_KN707686v1_decoy +chrUn_KN707687v1_decoy +chrUn_KN707688v1_decoy +chrUn_KN707689v1_decoy +chrUn_KN707690v1_decoy +chrUn_KN707691v1_decoy +chrUn_KN707692v1_decoy +chrUn_KN707693v1_decoy +chrUn_KN707694v1_decoy +chrUn_KN707695v1_decoy +chrUn_KN707696v1_decoy +chrUn_KN707697v1_decoy +chrUn_KN707698v1_decoy +chrUn_KN707699v1_decoy +chrUn_KN707700v1_decoy +chrUn_KN707701v1_decoy +chrUn_KN707702v1_decoy +chrUn_KN707703v1_decoy +chrUn_KN707704v1_decoy +chrUn_KN707705v1_decoy +chrUn_KN707706v1_decoy +chrUn_KN707707v1_decoy +chrUn_KN707708v1_decoy +chrUn_KN707709v1_decoy +chrUn_KN707710v1_decoy +chrUn_KN707711v1_decoy +chrUn_KN707712v1_decoy +chrUn_KN707713v1_decoy +chrUn_KN707714v1_decoy +chrUn_KN707715v1_decoy +chrUn_KN707716v1_decoy +chrUn_KN707717v1_decoy +chrUn_KN707718v1_decoy +chrUn_KN707719v1_decoy +chrUn_KN707720v1_decoy +chrUn_KN707721v1_decoy +chrUn_KN707722v1_decoy +chrUn_KN707723v1_decoy +chrUn_KN707724v1_decoy +chrUn_KN707725v1_decoy +chrUn_KN707726v1_decoy +chrUn_KN707727v1_decoy +chrUn_KN707728v1_decoy +chrUn_KN707729v1_decoy +chrUn_KN707730v1_decoy +chrUn_KN707731v1_decoy +chrUn_KN707732v1_decoy +chrUn_KN707733v1_decoy +chrUn_KN707734v1_decoy +chrUn_KN707735v1_decoy +chrUn_KN707736v1_decoy +chrUn_KN707737v1_decoy +chrUn_KN707738v1_decoy +chrUn_KN707739v1_decoy +chrUn_KN707740v1_decoy +chrUn_KN707741v1_decoy +chrUn_KN707742v1_decoy +chrUn_KN707743v1_decoy +chrUn_KN707744v1_decoy +chrUn_KN707745v1_decoy +chrUn_KN707746v1_decoy +chrUn_KN707747v1_decoy +chrUn_KN707748v1_decoy +chrUn_KN707749v1_decoy +chrUn_KN707750v1_decoy +chrUn_KN707751v1_decoy +chrUn_KN707752v1_decoy +chrUn_KN707753v1_decoy +chrUn_KN707754v1_decoy +chrUn_KN707755v1_decoy +chrUn_KN707756v1_decoy +chrUn_KN707757v1_decoy +chrUn_KN707758v1_decoy +chrUn_KN707759v1_decoy +chrUn_KN707760v1_decoy +chrUn_KN707761v1_decoy +chrUn_KN707762v1_decoy +chrUn_KN707763v1_decoy +chrUn_KN707764v1_decoy +chrUn_KN707765v1_decoy +chrUn_KN707766v1_decoy +chrUn_KN707767v1_decoy +chrUn_KN707768v1_decoy +chrUn_KN707769v1_decoy +chrUn_KN707770v1_decoy +chrUn_KN707771v1_decoy +chrUn_KN707772v1_decoy +chrUn_KN707773v1_decoy +chrUn_KN707774v1_decoy +chrUn_KN707775v1_decoy +chrUn_KN707776v1_decoy +chrUn_KN707777v1_decoy +chrUn_KN707778v1_decoy +chrUn_KN707779v1_decoy +chrUn_KN707780v1_decoy +chrUn_KN707781v1_decoy +chrUn_KN707782v1_decoy +chrUn_KN707783v1_decoy +chrUn_KN707784v1_decoy +chrUn_KN707785v1_decoy +chrUn_KN707786v1_decoy +chrUn_KN707787v1_decoy +chrUn_KN707788v1_decoy +chrUn_KN707789v1_decoy +chrUn_KN707790v1_decoy +chrUn_KN707791v1_decoy +chrUn_KN707792v1_decoy +chrUn_KN707793v1_decoy +chrUn_KN707794v1_decoy +chrUn_KN707795v1_decoy +chrUn_KN707796v1_decoy +chrUn_KN707797v1_decoy +chrUn_KN707798v1_decoy +chrUn_KN707799v1_decoy +chrUn_KN707800v1_decoy +chrUn_KN707801v1_decoy +chrUn_KN707802v1_decoy +chrUn_KN707803v1_decoy +chrUn_KN707804v1_decoy +chrUn_KN707805v1_decoy +chrUn_KN707806v1_decoy +chrUn_KN707807v1_decoy +chrUn_KN707808v1_decoy +chrUn_KN707809v1_decoy +chrUn_KN707810v1_decoy +chrUn_KN707811v1_decoy +chrUn_KN707812v1_decoy +chrUn_KN707813v1_decoy +chrUn_KN707814v1_decoy +chrUn_KN707815v1_decoy +chrUn_KN707816v1_decoy +chrUn_KN707817v1_decoy +chrUn_KN707818v1_decoy +chrUn_KN707819v1_decoy +chrUn_KN707820v1_decoy +chrUn_KN707821v1_decoy +chrUn_KN707822v1_decoy +chrUn_KN707823v1_decoy +chrUn_KN707824v1_decoy +chrUn_KN707825v1_decoy +chrUn_KN707826v1_decoy +chrUn_KN707827v1_decoy +chrUn_KN707828v1_decoy +chrUn_KN707829v1_decoy +chrUn_KN707830v1_decoy +chrUn_KN707831v1_decoy +chrUn_KN707832v1_decoy +chrUn_KN707833v1_decoy +chrUn_KN707834v1_decoy +chrUn_KN707835v1_decoy +chrUn_KN707836v1_decoy +chrUn_KN707837v1_decoy +chrUn_KN707838v1_decoy +chrUn_KN707839v1_decoy +chrUn_KN707840v1_decoy +chrUn_KN707841v1_decoy +chrUn_KN707842v1_decoy +chrUn_KN707843v1_decoy +chrUn_KN707844v1_decoy +chrUn_KN707845v1_decoy +chrUn_KN707846v1_decoy +chrUn_KN707847v1_decoy +chrUn_KN707848v1_decoy +chrUn_KN707849v1_decoy +chrUn_KN707850v1_decoy +chrUn_KN707851v1_decoy +chrUn_KN707852v1_decoy +chrUn_KN707853v1_decoy +chrUn_KN707854v1_decoy +chrUn_KN707855v1_decoy +chrUn_KN707856v1_decoy +chrUn_KN707857v1_decoy +chrUn_KN707858v1_decoy +chrUn_KN707859v1_decoy +chrUn_KN707860v1_decoy +chrUn_KN707861v1_decoy +chrUn_KN707862v1_decoy +chrUn_KN707863v1_decoy +chrUn_KN707864v1_decoy +chrUn_KN707865v1_decoy +chrUn_KN707866v1_decoy +chrUn_KN707867v1_decoy +chrUn_KN707868v1_decoy +chrUn_KN707869v1_decoy +chrUn_KN707870v1_decoy +chrUn_KN707871v1_decoy +chrUn_KN707872v1_decoy +chrUn_KN707873v1_decoy +chrUn_KN707874v1_decoy +chrUn_KN707875v1_decoy +chrUn_KN707876v1_decoy +chrUn_KN707877v1_decoy +chrUn_KN707878v1_decoy +chrUn_KN707879v1_decoy +chrUn_KN707880v1_decoy +chrUn_KN707881v1_decoy +chrUn_KN707882v1_decoy +chrUn_KN707883v1_decoy +chrUn_KN707884v1_decoy +chrUn_KN707885v1_decoy +chrUn_KN707886v1_decoy +chrUn_KN707887v1_decoy +chrUn_KN707888v1_decoy +chrUn_KN707889v1_decoy +chrUn_KN707890v1_decoy +chrUn_KN707891v1_decoy +chrUn_KN707892v1_decoy +chrUn_KN707893v1_decoy +chrUn_KN707894v1_decoy +chrUn_KN707895v1_decoy +chrUn_KN707896v1_decoy +chrUn_KN707897v1_decoy +chrUn_KN707898v1_decoy +chrUn_KN707899v1_decoy +chrUn_KN707900v1_decoy +chrUn_KN707901v1_decoy +chrUn_KN707902v1_decoy +chrUn_KN707903v1_decoy +chrUn_KN707904v1_decoy +chrUn_KN707905v1_decoy +chrUn_KN707906v1_decoy +chrUn_KN707907v1_decoy +chrUn_KN707908v1_decoy +chrUn_KN707909v1_decoy +chrUn_KN707910v1_decoy +chrUn_KN707911v1_decoy +chrUn_KN707912v1_decoy +chrUn_KN707913v1_decoy +chrUn_KN707914v1_decoy +chrUn_KN707915v1_decoy +chrUn_KN707916v1_decoy +chrUn_KN707917v1_decoy +chrUn_KN707918v1_decoy +chrUn_KN707919v1_decoy +chrUn_KN707920v1_decoy +chrUn_KN707921v1_decoy +chrUn_KN707922v1_decoy +chrUn_KN707923v1_decoy +chrUn_KN707924v1_decoy +chrUn_KN707925v1_decoy +chrUn_KN707926v1_decoy +chrUn_KN707927v1_decoy +chrUn_KN707928v1_decoy +chrUn_KN707929v1_decoy +chrUn_KN707930v1_decoy +chrUn_KN707931v1_decoy +chrUn_KN707932v1_decoy +chrUn_KN707933v1_decoy +chrUn_KN707934v1_decoy +chrUn_KN707935v1_decoy +chrUn_KN707936v1_decoy +chrUn_KN707937v1_decoy +chrUn_KN707938v1_decoy +chrUn_KN707939v1_decoy +chrUn_KN707940v1_decoy +chrUn_KN707941v1_decoy +chrUn_KN707942v1_decoy +chrUn_KN707943v1_decoy +chrUn_KN707944v1_decoy +chrUn_KN707945v1_decoy +chrUn_KN707946v1_decoy +chrUn_KN707947v1_decoy +chrUn_KN707948v1_decoy +chrUn_KN707949v1_decoy +chrUn_KN707950v1_decoy +chrUn_KN707951v1_decoy +chrUn_KN707952v1_decoy +chrUn_KN707953v1_decoy +chrUn_KN707954v1_decoy +chrUn_KN707955v1_decoy +chrUn_KN707956v1_decoy +chrUn_KN707957v1_decoy +chrUn_KN707958v1_decoy +chrUn_KN707959v1_decoy +chrUn_KN707960v1_decoy +chrUn_KN707961v1_decoy +chrUn_KN707962v1_decoy +chrUn_KN707963v1_decoy +chrUn_KN707964v1_decoy +chrUn_KN707965v1_decoy +chrUn_KN707966v1_decoy +chrUn_KN707967v1_decoy +chrUn_KN707968v1_decoy +chrUn_KN707969v1_decoy +chrUn_KN707970v1_decoy +chrUn_KN707971v1_decoy +chrUn_KN707972v1_decoy +chrUn_KN707973v1_decoy +chrUn_KN707974v1_decoy +chrUn_KN707975v1_decoy +chrUn_KN707976v1_decoy +chrUn_KN707977v1_decoy +chrUn_KN707978v1_decoy +chrUn_KN707979v1_decoy +chrUn_KN707980v1_decoy +chrUn_KN707981v1_decoy +chrUn_KN707982v1_decoy +chrUn_KN707983v1_decoy +chrUn_KN707984v1_decoy +chrUn_KN707985v1_decoy +chrUn_KN707986v1_decoy +chrUn_KN707987v1_decoy +chrUn_KN707988v1_decoy +chrUn_KN707989v1_decoy +chrUn_KN707990v1_decoy +chrUn_KN707991v1_decoy +chrUn_KN707992v1_decoy +chrUn_JTFH01000001v1_decoy +chrUn_JTFH01000002v1_decoy +chrUn_JTFH01000003v1_decoy +chrUn_JTFH01000004v1_decoy +chrUn_JTFH01000005v1_decoy +chrUn_JTFH01000006v1_decoy +chrUn_JTFH01000007v1_decoy +chrUn_JTFH01000008v1_decoy +chrUn_JTFH01000009v1_decoy +chrUn_JTFH01000010v1_decoy +chrUn_JTFH01000011v1_decoy +chrUn_JTFH01000012v1_decoy +chrUn_JTFH01000013v1_decoy +chrUn_JTFH01000014v1_decoy +chrUn_JTFH01000015v1_decoy +chrUn_JTFH01000016v1_decoy +chrUn_JTFH01000017v1_decoy +chrUn_JTFH01000018v1_decoy +chrUn_JTFH01000019v1_decoy +chrUn_JTFH01000020v1_decoy +chrUn_JTFH01000021v1_decoy +chrUn_JTFH01000022v1_decoy +chrUn_JTFH01000023v1_decoy +chrUn_JTFH01000024v1_decoy +chrUn_JTFH01000025v1_decoy +chrUn_JTFH01000026v1_decoy +chrUn_JTFH01000027v1_decoy +chrUn_JTFH01000028v1_decoy +chrUn_JTFH01000029v1_decoy +chrUn_JTFH01000030v1_decoy +chrUn_JTFH01000031v1_decoy +chrUn_JTFH01000032v1_decoy +chrUn_JTFH01000033v1_decoy +chrUn_JTFH01000034v1_decoy +chrUn_JTFH01000035v1_decoy +chrUn_JTFH01000036v1_decoy +chrUn_JTFH01000037v1_decoy +chrUn_JTFH01000038v1_decoy +chrUn_JTFH01000039v1_decoy +chrUn_JTFH01000040v1_decoy +chrUn_JTFH01000041v1_decoy +chrUn_JTFH01000042v1_decoy +chrUn_JTFH01000043v1_decoy +chrUn_JTFH01000044v1_decoy +chrUn_JTFH01000045v1_decoy +chrUn_JTFH01000046v1_decoy +chrUn_JTFH01000047v1_decoy +chrUn_JTFH01000048v1_decoy +chrUn_JTFH01000049v1_decoy +chrUn_JTFH01000050v1_decoy +chrUn_JTFH01000051v1_decoy +chrUn_JTFH01000052v1_decoy +chrUn_JTFH01000053v1_decoy +chrUn_JTFH01000054v1_decoy +chrUn_JTFH01000055v1_decoy +chrUn_JTFH01000056v1_decoy +chrUn_JTFH01000057v1_decoy +chrUn_JTFH01000058v1_decoy +chrUn_JTFH01000059v1_decoy +chrUn_JTFH01000060v1_decoy +chrUn_JTFH01000061v1_decoy +chrUn_JTFH01000062v1_decoy +chrUn_JTFH01000063v1_decoy +chrUn_JTFH01000064v1_decoy +chrUn_JTFH01000065v1_decoy +chrUn_JTFH01000066v1_decoy +chrUn_JTFH01000067v1_decoy +chrUn_JTFH01000068v1_decoy +chrUn_JTFH01000069v1_decoy +chrUn_JTFH01000070v1_decoy +chrUn_JTFH01000071v1_decoy +chrUn_JTFH01000072v1_decoy +chrUn_JTFH01000073v1_decoy +chrUn_JTFH01000074v1_decoy +chrUn_JTFH01000075v1_decoy +chrUn_JTFH01000076v1_decoy +chrUn_JTFH01000077v1_decoy +chrUn_JTFH01000078v1_decoy +chrUn_JTFH01000079v1_decoy +chrUn_JTFH01000080v1_decoy +chrUn_JTFH01000081v1_decoy +chrUn_JTFH01000082v1_decoy +chrUn_JTFH01000083v1_decoy +chrUn_JTFH01000084v1_decoy +chrUn_JTFH01000085v1_decoy +chrUn_JTFH01000086v1_decoy +chrUn_JTFH01000087v1_decoy +chrUn_JTFH01000088v1_decoy +chrUn_JTFH01000089v1_decoy +chrUn_JTFH01000090v1_decoy +chrUn_JTFH01000091v1_decoy +chrUn_JTFH01000092v1_decoy +chrUn_JTFH01000093v1_decoy +chrUn_JTFH01000094v1_decoy +chrUn_JTFH01000095v1_decoy +chrUn_JTFH01000096v1_decoy +chrUn_JTFH01000097v1_decoy +chrUn_JTFH01000098v1_decoy +chrUn_JTFH01000099v1_decoy +chrUn_JTFH01000100v1_decoy +chrUn_JTFH01000101v1_decoy +chrUn_JTFH01000102v1_decoy +chrUn_JTFH01000103v1_decoy +chrUn_JTFH01000104v1_decoy +chrUn_JTFH01000105v1_decoy +chrUn_JTFH01000106v1_decoy +chrUn_JTFH01000107v1_decoy +chrUn_JTFH01000108v1_decoy +chrUn_JTFH01000109v1_decoy +chrUn_JTFH01000110v1_decoy +chrUn_JTFH01000111v1_decoy +chrUn_JTFH01000112v1_decoy +chrUn_JTFH01000113v1_decoy +chrUn_JTFH01000114v1_decoy +chrUn_JTFH01000115v1_decoy +chrUn_JTFH01000116v1_decoy +chrUn_JTFH01000117v1_decoy +chrUn_JTFH01000118v1_decoy +chrUn_JTFH01000119v1_decoy +chrUn_JTFH01000120v1_decoy +chrUn_JTFH01000121v1_decoy +chrUn_JTFH01000122v1_decoy +chrUn_JTFH01000123v1_decoy +chrUn_JTFH01000124v1_decoy +chrUn_JTFH01000125v1_decoy +chrUn_JTFH01000126v1_decoy +chrUn_JTFH01000127v1_decoy +chrUn_JTFH01000128v1_decoy +chrUn_JTFH01000129v1_decoy +chrUn_JTFH01000130v1_decoy +chrUn_JTFH01000131v1_decoy +chrUn_JTFH01000132v1_decoy +chrUn_JTFH01000133v1_decoy +chrUn_JTFH01000134v1_decoy +chrUn_JTFH01000135v1_decoy +chrUn_JTFH01000136v1_decoy +chrUn_JTFH01000137v1_decoy +chrUn_JTFH01000138v1_decoy +chrUn_JTFH01000139v1_decoy +chrUn_JTFH01000140v1_decoy +chrUn_JTFH01000141v1_decoy +chrUn_JTFH01000142v1_decoy +chrUn_JTFH01000143v1_decoy +chrUn_JTFH01000144v1_decoy +chrUn_JTFH01000145v1_decoy +chrUn_JTFH01000146v1_decoy +chrUn_JTFH01000147v1_decoy +chrUn_JTFH01000148v1_decoy +chrUn_JTFH01000149v1_decoy +chrUn_JTFH01000150v1_decoy +chrUn_JTFH01000151v1_decoy +chrUn_JTFH01000152v1_decoy +chrUn_JTFH01000153v1_decoy +chrUn_JTFH01000154v1_decoy +chrUn_JTFH01000155v1_decoy +chrUn_JTFH01000156v1_decoy +chrUn_JTFH01000157v1_decoy +chrUn_JTFH01000158v1_decoy +chrUn_JTFH01000159v1_decoy +chrUn_JTFH01000160v1_decoy +chrUn_JTFH01000161v1_decoy +chrUn_JTFH01000162v1_decoy +chrUn_JTFH01000163v1_decoy +chrUn_JTFH01000164v1_decoy +chrUn_JTFH01000165v1_decoy +chrUn_JTFH01000166v1_decoy +chrUn_JTFH01000167v1_decoy +chrUn_JTFH01000168v1_decoy +chrUn_JTFH01000169v1_decoy +chrUn_JTFH01000170v1_decoy +chrUn_JTFH01000171v1_decoy +chrUn_JTFH01000172v1_decoy +chrUn_JTFH01000173v1_decoy +chrUn_JTFH01000174v1_decoy +chrUn_JTFH01000175v1_decoy +chrUn_JTFH01000176v1_decoy +chrUn_JTFH01000177v1_decoy +chrUn_JTFH01000178v1_decoy +chrUn_JTFH01000179v1_decoy +chrUn_JTFH01000180v1_decoy +chrUn_JTFH01000181v1_decoy +chrUn_JTFH01000182v1_decoy +chrUn_JTFH01000183v1_decoy +chrUn_JTFH01000184v1_decoy +chrUn_JTFH01000185v1_decoy +chrUn_JTFH01000186v1_decoy +chrUn_JTFH01000187v1_decoy +chrUn_JTFH01000188v1_decoy +chrUn_JTFH01000189v1_decoy +chrUn_JTFH01000190v1_decoy +chrUn_JTFH01000191v1_decoy +chrUn_JTFH01000192v1_decoy +chrUn_JTFH01000193v1_decoy +chrUn_JTFH01000194v1_decoy +chrUn_JTFH01000195v1_decoy +chrUn_JTFH01000196v1_decoy +chrUn_JTFH01000197v1_decoy +chrUn_JTFH01000198v1_decoy +chrUn_JTFH01000199v1_decoy +chrUn_JTFH01000200v1_decoy +chrUn_JTFH01000201v1_decoy +chrUn_JTFH01000202v1_decoy +chrUn_JTFH01000203v1_decoy +chrUn_JTFH01000204v1_decoy +chrUn_JTFH01000205v1_decoy +chrUn_JTFH01000206v1_decoy +chrUn_JTFH01000207v1_decoy +chrUn_JTFH01000208v1_decoy +chrUn_JTFH01000209v1_decoy +chrUn_JTFH01000210v1_decoy +chrUn_JTFH01000211v1_decoy +chrUn_JTFH01000212v1_decoy +chrUn_JTFH01000213v1_decoy +chrUn_JTFH01000214v1_decoy +chrUn_JTFH01000215v1_decoy +chrUn_JTFH01000216v1_decoy +chrUn_JTFH01000217v1_decoy +chrUn_JTFH01000218v1_decoy +chrUn_JTFH01000219v1_decoy +chrUn_JTFH01000220v1_decoy +chrUn_JTFH01000221v1_decoy +chrUn_JTFH01000222v1_decoy +chrUn_JTFH01000223v1_decoy +chrUn_JTFH01000224v1_decoy +chrUn_JTFH01000225v1_decoy +chrUn_JTFH01000226v1_decoy +chrUn_JTFH01000227v1_decoy +chrUn_JTFH01000228v1_decoy +chrUn_JTFH01000229v1_decoy +chrUn_JTFH01000230v1_decoy +chrUn_JTFH01000231v1_decoy +chrUn_JTFH01000232v1_decoy +chrUn_JTFH01000233v1_decoy +chrUn_JTFH01000234v1_decoy +chrUn_JTFH01000235v1_decoy +chrUn_JTFH01000236v1_decoy +chrUn_JTFH01000237v1_decoy +chrUn_JTFH01000238v1_decoy +chrUn_JTFH01000239v1_decoy +chrUn_JTFH01000240v1_decoy +chrUn_JTFH01000241v1_decoy +chrUn_JTFH01000242v1_decoy +chrUn_JTFH01000243v1_decoy +chrUn_JTFH01000244v1_decoy +chrUn_JTFH01000245v1_decoy +chrUn_JTFH01000246v1_decoy +chrUn_JTFH01000247v1_decoy +chrUn_JTFH01000248v1_decoy +chrUn_JTFH01000249v1_decoy +chrUn_JTFH01000250v1_decoy +chrUn_JTFH01000251v1_decoy +chrUn_JTFH01000252v1_decoy +chrUn_JTFH01000253v1_decoy +chrUn_JTFH01000254v1_decoy +chrUn_JTFH01000255v1_decoy +chrUn_JTFH01000256v1_decoy +chrUn_JTFH01000257v1_decoy +chrUn_JTFH01000258v1_decoy +chrUn_JTFH01000259v1_decoy +chrUn_JTFH01000260v1_decoy +chrUn_JTFH01000261v1_decoy +chrUn_JTFH01000262v1_decoy +chrUn_JTFH01000263v1_decoy +chrUn_JTFH01000264v1_decoy +chrUn_JTFH01000265v1_decoy +chrUn_JTFH01000266v1_decoy +chrUn_JTFH01000267v1_decoy +chrUn_JTFH01000268v1_decoy +chrUn_JTFH01000269v1_decoy +chrUn_JTFH01000270v1_decoy +chrUn_JTFH01000271v1_decoy +chrUn_JTFH01000272v1_decoy +chrUn_JTFH01000273v1_decoy +chrUn_JTFH01000274v1_decoy +chrUn_JTFH01000275v1_decoy +chrUn_JTFH01000276v1_decoy +chrUn_JTFH01000277v1_decoy +chrUn_JTFH01000278v1_decoy +chrUn_JTFH01000279v1_decoy +chrUn_JTFH01000280v1_decoy +chrUn_JTFH01000281v1_decoy +chrUn_JTFH01000282v1_decoy +chrUn_JTFH01000283v1_decoy +chrUn_JTFH01000284v1_decoy +chrUn_JTFH01000285v1_decoy +chrUn_JTFH01000286v1_decoy +chrUn_JTFH01000287v1_decoy +chrUn_JTFH01000288v1_decoy +chrUn_JTFH01000289v1_decoy +chrUn_JTFH01000290v1_decoy +chrUn_JTFH01000291v1_decoy +chrUn_JTFH01000292v1_decoy +chrUn_JTFH01000293v1_decoy +chrUn_JTFH01000294v1_decoy +chrUn_JTFH01000295v1_decoy +chrUn_JTFH01000296v1_decoy +chrUn_JTFH01000297v1_decoy +chrUn_JTFH01000298v1_decoy +chrUn_JTFH01000299v1_decoy +chrUn_JTFH01000300v1_decoy +chrUn_JTFH01000301v1_decoy +chrUn_JTFH01000302v1_decoy +chrUn_JTFH01000303v1_decoy +chrUn_JTFH01000304v1_decoy +chrUn_JTFH01000305v1_decoy +chrUn_JTFH01000306v1_decoy +chrUn_JTFH01000307v1_decoy +chrUn_JTFH01000308v1_decoy +chrUn_JTFH01000309v1_decoy +chrUn_JTFH01000310v1_decoy +chrUn_JTFH01000311v1_decoy +chrUn_JTFH01000312v1_decoy +chrUn_JTFH01000313v1_decoy +chrUn_JTFH01000314v1_decoy +chrUn_JTFH01000315v1_decoy +chrUn_JTFH01000316v1_decoy +chrUn_JTFH01000317v1_decoy +chrUn_JTFH01000318v1_decoy +chrUn_JTFH01000319v1_decoy +chrUn_JTFH01000320v1_decoy +chrUn_JTFH01000321v1_decoy +chrUn_JTFH01000322v1_decoy +chrUn_JTFH01000323v1_decoy +chrUn_JTFH01000324v1_decoy +chrUn_JTFH01000325v1_decoy +chrUn_JTFH01000326v1_decoy +chrUn_JTFH01000327v1_decoy +chrUn_JTFH01000328v1_decoy +chrUn_JTFH01000329v1_decoy +chrUn_JTFH01000330v1_decoy +chrUn_JTFH01000331v1_decoy +chrUn_JTFH01000332v1_decoy +chrUn_JTFH01000333v1_decoy +chrUn_JTFH01000334v1_decoy +chrUn_JTFH01000335v1_decoy +chrUn_JTFH01000336v1_decoy +chrUn_JTFH01000337v1_decoy +chrUn_JTFH01000338v1_decoy +chrUn_JTFH01000339v1_decoy +chrUn_JTFH01000340v1_decoy +chrUn_JTFH01000341v1_decoy +chrUn_JTFH01000342v1_decoy +chrUn_JTFH01000343v1_decoy +chrUn_JTFH01000344v1_decoy +chrUn_JTFH01000345v1_decoy +chrUn_JTFH01000346v1_decoy +chrUn_JTFH01000347v1_decoy +chrUn_JTFH01000348v1_decoy +chrUn_JTFH01000349v1_decoy +chrUn_JTFH01000350v1_decoy +chrUn_JTFH01000351v1_decoy +chrUn_JTFH01000352v1_decoy +chrUn_JTFH01000353v1_decoy +chrUn_JTFH01000354v1_decoy +chrUn_JTFH01000355v1_decoy +chrUn_JTFH01000356v1_decoy +chrUn_JTFH01000357v1_decoy +chrUn_JTFH01000358v1_decoy +chrUn_JTFH01000359v1_decoy +chrUn_JTFH01000360v1_decoy +chrUn_JTFH01000361v1_decoy +chrUn_JTFH01000362v1_decoy +chrUn_JTFH01000363v1_decoy +chrUn_JTFH01000364v1_decoy +chrUn_JTFH01000365v1_decoy +chrUn_JTFH01000366v1_decoy +chrUn_JTFH01000367v1_decoy +chrUn_JTFH01000368v1_decoy +chrUn_JTFH01000369v1_decoy +chrUn_JTFH01000370v1_decoy +chrUn_JTFH01000371v1_decoy +chrUn_JTFH01000372v1_decoy +chrUn_JTFH01000373v1_decoy +chrUn_JTFH01000374v1_decoy +chrUn_JTFH01000375v1_decoy +chrUn_JTFH01000376v1_decoy +chrUn_JTFH01000377v1_decoy +chrUn_JTFH01000378v1_decoy +chrUn_JTFH01000379v1_decoy +chrUn_JTFH01000380v1_decoy +chrUn_JTFH01000381v1_decoy +chrUn_JTFH01000382v1_decoy +chrUn_JTFH01000383v1_decoy +chrUn_JTFH01000384v1_decoy +chrUn_JTFH01000385v1_decoy +chrUn_JTFH01000386v1_decoy +chrUn_JTFH01000387v1_decoy +chrUn_JTFH01000388v1_decoy +chrUn_JTFH01000389v1_decoy +chrUn_JTFH01000390v1_decoy +chrUn_JTFH01000391v1_decoy +chrUn_JTFH01000392v1_decoy +chrUn_JTFH01000393v1_decoy +chrUn_JTFH01000394v1_decoy +chrUn_JTFH01000395v1_decoy +chrUn_JTFH01000396v1_decoy +chrUn_JTFH01000397v1_decoy +chrUn_JTFH01000398v1_decoy +chrUn_JTFH01000399v1_decoy +chrUn_JTFH01000400v1_decoy +chrUn_JTFH01000401v1_decoy +chrUn_JTFH01000402v1_decoy +chrUn_JTFH01000403v1_decoy +chrUn_JTFH01000404v1_decoy +chrUn_JTFH01000405v1_decoy +chrUn_JTFH01000406v1_decoy +chrUn_JTFH01000407v1_decoy +chrUn_JTFH01000408v1_decoy +chrUn_JTFH01000409v1_decoy +chrUn_JTFH01000410v1_decoy +chrUn_JTFH01000411v1_decoy +chrUn_JTFH01000412v1_decoy +chrUn_JTFH01000413v1_decoy +chrUn_JTFH01000414v1_decoy +chrUn_JTFH01000415v1_decoy +chrUn_JTFH01000416v1_decoy +chrUn_JTFH01000417v1_decoy +chrUn_JTFH01000418v1_decoy +chrUn_JTFH01000419v1_decoy +chrUn_JTFH01000420v1_decoy +chrUn_JTFH01000421v1_decoy +chrUn_JTFH01000422v1_decoy +chrUn_JTFH01000423v1_decoy +chrUn_JTFH01000424v1_decoy +chrUn_JTFH01000425v1_decoy +chrUn_JTFH01000426v1_decoy +chrUn_JTFH01000427v1_decoy +chrUn_JTFH01000428v1_decoy +chrUn_JTFH01000429v1_decoy +chrUn_JTFH01000430v1_decoy +chrUn_JTFH01000431v1_decoy +chrUn_JTFH01000432v1_decoy +chrUn_JTFH01000433v1_decoy +chrUn_JTFH01000434v1_decoy +chrUn_JTFH01000435v1_decoy +chrUn_JTFH01000436v1_decoy +chrUn_JTFH01000437v1_decoy +chrUn_JTFH01000438v1_decoy +chrUn_JTFH01000439v1_decoy +chrUn_JTFH01000440v1_decoy +chrUn_JTFH01000441v1_decoy +chrUn_JTFH01000442v1_decoy +chrUn_JTFH01000443v1_decoy +chrUn_JTFH01000444v1_decoy +chrUn_JTFH01000445v1_decoy +chrUn_JTFH01000446v1_decoy +chrUn_JTFH01000447v1_decoy +chrUn_JTFH01000448v1_decoy +chrUn_JTFH01000449v1_decoy +chrUn_JTFH01000450v1_decoy +chrUn_JTFH01000451v1_decoy +chrUn_JTFH01000452v1_decoy +chrUn_JTFH01000453v1_decoy +chrUn_JTFH01000454v1_decoy +chrUn_JTFH01000455v1_decoy +chrUn_JTFH01000456v1_decoy +chrUn_JTFH01000457v1_decoy +chrUn_JTFH01000458v1_decoy +chrUn_JTFH01000459v1_decoy +chrUn_JTFH01000460v1_decoy +chrUn_JTFH01000461v1_decoy +chrUn_JTFH01000462v1_decoy +chrUn_JTFH01000463v1_decoy +chrUn_JTFH01000464v1_decoy +chrUn_JTFH01000465v1_decoy +chrUn_JTFH01000466v1_decoy +chrUn_JTFH01000467v1_decoy +chrUn_JTFH01000468v1_decoy +chrUn_JTFH01000469v1_decoy +chrUn_JTFH01000470v1_decoy +chrUn_JTFH01000471v1_decoy +chrUn_JTFH01000472v1_decoy +chrUn_JTFH01000473v1_decoy +chrUn_JTFH01000474v1_decoy +chrUn_JTFH01000475v1_decoy +chrUn_JTFH01000476v1_decoy +chrUn_JTFH01000477v1_decoy +chrUn_JTFH01000478v1_decoy +chrUn_JTFH01000479v1_decoy +chrUn_JTFH01000480v1_decoy +chrUn_JTFH01000481v1_decoy +chrUn_JTFH01000482v1_decoy +chrUn_JTFH01000483v1_decoy +chrUn_JTFH01000484v1_decoy +chrUn_JTFH01000485v1_decoy +chrUn_JTFH01000486v1_decoy +chrUn_JTFH01000487v1_decoy +chrUn_JTFH01000488v1_decoy +chrUn_JTFH01000489v1_decoy +chrUn_JTFH01000490v1_decoy +chrUn_JTFH01000491v1_decoy +chrUn_JTFH01000492v1_decoy +chrUn_JTFH01000493v1_decoy +chrUn_JTFH01000494v1_decoy +chrUn_JTFH01000495v1_decoy +chrUn_JTFH01000496v1_decoy +chrUn_JTFH01000497v1_decoy +chrUn_JTFH01000498v1_decoy +chrUn_JTFH01000499v1_decoy +chrUn_JTFH01000500v1_decoy +chrUn_JTFH01000501v1_decoy +chrUn_JTFH01000502v1_decoy +chrUn_JTFH01000503v1_decoy +chrUn_JTFH01000504v1_decoy +chrUn_JTFH01000505v1_decoy +chrUn_JTFH01000506v1_decoy +chrUn_JTFH01000507v1_decoy +chrUn_JTFH01000508v1_decoy +chrUn_JTFH01000509v1_decoy +chrUn_JTFH01000510v1_decoy +chrUn_JTFH01000511v1_decoy +chrUn_JTFH01000512v1_decoy +chrUn_JTFH01000513v1_decoy +chrUn_JTFH01000514v1_decoy +chrUn_JTFH01000515v1_decoy +chrUn_JTFH01000516v1_decoy +chrUn_JTFH01000517v1_decoy +chrUn_JTFH01000518v1_decoy +chrUn_JTFH01000519v1_decoy +chrUn_JTFH01000520v1_decoy +chrUn_JTFH01000521v1_decoy +chrUn_JTFH01000522v1_decoy +chrUn_JTFH01000523v1_decoy +chrUn_JTFH01000524v1_decoy +chrUn_JTFH01000525v1_decoy +chrUn_JTFH01000526v1_decoy +chrUn_JTFH01000527v1_decoy +chrUn_JTFH01000528v1_decoy +chrUn_JTFH01000529v1_decoy +chrUn_JTFH01000530v1_decoy +chrUn_JTFH01000531v1_decoy +chrUn_JTFH01000532v1_decoy +chrUn_JTFH01000533v1_decoy +chrUn_JTFH01000534v1_decoy +chrUn_JTFH01000535v1_decoy +chrUn_JTFH01000536v1_decoy +chrUn_JTFH01000537v1_decoy +chrUn_JTFH01000538v1_decoy +chrUn_JTFH01000539v1_decoy +chrUn_JTFH01000540v1_decoy +chrUn_JTFH01000541v1_decoy +chrUn_JTFH01000542v1_decoy +chrUn_JTFH01000543v1_decoy +chrUn_JTFH01000544v1_decoy +chrUn_JTFH01000545v1_decoy +chrUn_JTFH01000546v1_decoy +chrUn_JTFH01000547v1_decoy +chrUn_JTFH01000548v1_decoy +chrUn_JTFH01000549v1_decoy +chrUn_JTFH01000550v1_decoy +chrUn_JTFH01000551v1_decoy +chrUn_JTFH01000552v1_decoy +chrUn_JTFH01000553v1_decoy +chrUn_JTFH01000554v1_decoy +chrUn_JTFH01000555v1_decoy +chrUn_JTFH01000556v1_decoy +chrUn_JTFH01000557v1_decoy +chrUn_JTFH01000558v1_decoy +chrUn_JTFH01000559v1_decoy +chrUn_JTFH01000560v1_decoy +chrUn_JTFH01000561v1_decoy +chrUn_JTFH01000562v1_decoy +chrUn_JTFH01000563v1_decoy +chrUn_JTFH01000564v1_decoy +chrUn_JTFH01000565v1_decoy +chrUn_JTFH01000566v1_decoy +chrUn_JTFH01000567v1_decoy +chrUn_JTFH01000568v1_decoy +chrUn_JTFH01000569v1_decoy +chrUn_JTFH01000570v1_decoy +chrUn_JTFH01000571v1_decoy +chrUn_JTFH01000572v1_decoy +chrUn_JTFH01000573v1_decoy +chrUn_JTFH01000574v1_decoy +chrUn_JTFH01000575v1_decoy +chrUn_JTFH01000576v1_decoy +chrUn_JTFH01000577v1_decoy +chrUn_JTFH01000578v1_decoy +chrUn_JTFH01000579v1_decoy +chrUn_JTFH01000580v1_decoy +chrUn_JTFH01000581v1_decoy +chrUn_JTFH01000582v1_decoy +chrUn_JTFH01000583v1_decoy +chrUn_JTFH01000584v1_decoy +chrUn_JTFH01000585v1_decoy +chrUn_JTFH01000586v1_decoy +chrUn_JTFH01000587v1_decoy +chrUn_JTFH01000588v1_decoy +chrUn_JTFH01000589v1_decoy +chrUn_JTFH01000590v1_decoy +chrUn_JTFH01000591v1_decoy +chrUn_JTFH01000592v1_decoy +chrUn_JTFH01000593v1_decoy +chrUn_JTFH01000594v1_decoy +chrUn_JTFH01000595v1_decoy +chrUn_JTFH01000596v1_decoy +chrUn_JTFH01000597v1_decoy +chrUn_JTFH01000598v1_decoy +chrUn_JTFH01000599v1_decoy +chrUn_JTFH01000600v1_decoy +chrUn_JTFH01000601v1_decoy +chrUn_JTFH01000602v1_decoy +chrUn_JTFH01000603v1_decoy +chrUn_JTFH01000604v1_decoy +chrUn_JTFH01000605v1_decoy +chrUn_JTFH01000606v1_decoy +chrUn_JTFH01000607v1_decoy +chrUn_JTFH01000608v1_decoy +chrUn_JTFH01000609v1_decoy +chrUn_JTFH01000610v1_decoy +chrUn_JTFH01000611v1_decoy +chrUn_JTFH01000612v1_decoy +chrUn_JTFH01000613v1_decoy +chrUn_JTFH01000614v1_decoy +chrUn_JTFH01000615v1_decoy +chrUn_JTFH01000616v1_decoy +chrUn_JTFH01000617v1_decoy +chrUn_JTFH01000618v1_decoy +chrUn_JTFH01000619v1_decoy +chrUn_JTFH01000620v1_decoy +chrUn_JTFH01000621v1_decoy +chrUn_JTFH01000622v1_decoy +chrUn_JTFH01000623v1_decoy +chrUn_JTFH01000624v1_decoy +chrUn_JTFH01000625v1_decoy +chrUn_JTFH01000626v1_decoy +chrUn_JTFH01000627v1_decoy +chrUn_JTFH01000628v1_decoy +chrUn_JTFH01000629v1_decoy +chrUn_JTFH01000630v1_decoy +chrUn_JTFH01000631v1_decoy +chrUn_JTFH01000632v1_decoy +chrUn_JTFH01000633v1_decoy +chrUn_JTFH01000634v1_decoy +chrUn_JTFH01000635v1_decoy +chrUn_JTFH01000636v1_decoy +chrUn_JTFH01000637v1_decoy +chrUn_JTFH01000638v1_decoy +chrUn_JTFH01000639v1_decoy +chrUn_JTFH01000640v1_decoy +chrUn_JTFH01000641v1_decoy +chrUn_JTFH01000642v1_decoy +chrUn_JTFH01000643v1_decoy +chrUn_JTFH01000644v1_decoy +chrUn_JTFH01000645v1_decoy +chrUn_JTFH01000646v1_decoy +chrUn_JTFH01000647v1_decoy +chrUn_JTFH01000648v1_decoy +chrUn_JTFH01000649v1_decoy +chrUn_JTFH01000650v1_decoy +chrUn_JTFH01000651v1_decoy +chrUn_JTFH01000652v1_decoy +chrUn_JTFH01000653v1_decoy +chrUn_JTFH01000654v1_decoy +chrUn_JTFH01000655v1_decoy +chrUn_JTFH01000656v1_decoy +chrUn_JTFH01000657v1_decoy +chrUn_JTFH01000658v1_decoy +chrUn_JTFH01000659v1_decoy +chrUn_JTFH01000660v1_decoy +chrUn_JTFH01000661v1_decoy +chrUn_JTFH01000662v1_decoy +chrUn_JTFH01000663v1_decoy +chrUn_JTFH01000664v1_decoy +chrUn_JTFH01000665v1_decoy +chrUn_JTFH01000666v1_decoy +chrUn_JTFH01000667v1_decoy +chrUn_JTFH01000668v1_decoy +chrUn_JTFH01000669v1_decoy +chrUn_JTFH01000670v1_decoy +chrUn_JTFH01000671v1_decoy +chrUn_JTFH01000672v1_decoy +chrUn_JTFH01000673v1_decoy +chrUn_JTFH01000674v1_decoy +chrUn_JTFH01000675v1_decoy +chrUn_JTFH01000676v1_decoy +chrUn_JTFH01000677v1_decoy +chrUn_JTFH01000678v1_decoy +chrUn_JTFH01000679v1_decoy +chrUn_JTFH01000680v1_decoy +chrUn_JTFH01000681v1_decoy +chrUn_JTFH01000682v1_decoy +chrUn_JTFH01000683v1_decoy +chrUn_JTFH01000684v1_decoy +chrUn_JTFH01000685v1_decoy +chrUn_JTFH01000686v1_decoy +chrUn_JTFH01000687v1_decoy +chrUn_JTFH01000688v1_decoy +chrUn_JTFH01000689v1_decoy +chrUn_JTFH01000690v1_decoy +chrUn_JTFH01000691v1_decoy +chrUn_JTFH01000692v1_decoy +chrUn_JTFH01000693v1_decoy +chrUn_JTFH01000694v1_decoy +chrUn_JTFH01000695v1_decoy +chrUn_JTFH01000696v1_decoy +chrUn_JTFH01000697v1_decoy +chrUn_JTFH01000698v1_decoy +chrUn_JTFH01000699v1_decoy +chrUn_JTFH01000700v1_decoy +chrUn_JTFH01000701v1_decoy +chrUn_JTFH01000702v1_decoy +chrUn_JTFH01000703v1_decoy +chrUn_JTFH01000704v1_decoy +chrUn_JTFH01000705v1_decoy +chrUn_JTFH01000706v1_decoy +chrUn_JTFH01000707v1_decoy +chrUn_JTFH01000708v1_decoy +chrUn_JTFH01000709v1_decoy +chrUn_JTFH01000710v1_decoy +chrUn_JTFH01000711v1_decoy +chrUn_JTFH01000712v1_decoy +chrUn_JTFH01000713v1_decoy +chrUn_JTFH01000714v1_decoy +chrUn_JTFH01000715v1_decoy +chrUn_JTFH01000716v1_decoy +chrUn_JTFH01000717v1_decoy +chrUn_JTFH01000718v1_decoy +chrUn_JTFH01000719v1_decoy +chrUn_JTFH01000720v1_decoy +chrUn_JTFH01000721v1_decoy +chrUn_JTFH01000722v1_decoy +chrUn_JTFH01000723v1_decoy +chrUn_JTFH01000724v1_decoy +chrUn_JTFH01000725v1_decoy +chrUn_JTFH01000726v1_decoy +chrUn_JTFH01000727v1_decoy +chrUn_JTFH01000728v1_decoy +chrUn_JTFH01000729v1_decoy +chrUn_JTFH01000730v1_decoy +chrUn_JTFH01000731v1_decoy +chrUn_JTFH01000732v1_decoy +chrUn_JTFH01000733v1_decoy +chrUn_JTFH01000734v1_decoy +chrUn_JTFH01000735v1_decoy +chrUn_JTFH01000736v1_decoy +chrUn_JTFH01000737v1_decoy +chrUn_JTFH01000738v1_decoy +chrUn_JTFH01000739v1_decoy +chrUn_JTFH01000740v1_decoy +chrUn_JTFH01000741v1_decoy +chrUn_JTFH01000742v1_decoy +chrUn_JTFH01000743v1_decoy +chrUn_JTFH01000744v1_decoy +chrUn_JTFH01000745v1_decoy +chrUn_JTFH01000746v1_decoy +chrUn_JTFH01000747v1_decoy +chrUn_JTFH01000748v1_decoy +chrUn_JTFH01000749v1_decoy +chrUn_JTFH01000750v1_decoy +chrUn_JTFH01000751v1_decoy +chrUn_JTFH01000752v1_decoy +chrUn_JTFH01000753v1_decoy +chrUn_JTFH01000754v1_decoy +chrUn_JTFH01000755v1_decoy +chrUn_JTFH01000756v1_decoy +chrUn_JTFH01000757v1_decoy +chrUn_JTFH01000758v1_decoy +chrUn_JTFH01000759v1_decoy +chrUn_JTFH01000760v1_decoy +chrUn_JTFH01000761v1_decoy +chrUn_JTFH01000762v1_decoy +chrUn_JTFH01000763v1_decoy +chrUn_JTFH01000764v1_decoy +chrUn_JTFH01000765v1_decoy +chrUn_JTFH01000766v1_decoy +chrUn_JTFH01000767v1_decoy +chrUn_JTFH01000768v1_decoy +chrUn_JTFH01000769v1_decoy +chrUn_JTFH01000770v1_decoy +chrUn_JTFH01000771v1_decoy +chrUn_JTFH01000772v1_decoy +chrUn_JTFH01000773v1_decoy +chrUn_JTFH01000774v1_decoy +chrUn_JTFH01000775v1_decoy +chrUn_JTFH01000776v1_decoy +chrUn_JTFH01000777v1_decoy +chrUn_JTFH01000778v1_decoy +chrUn_JTFH01000779v1_decoy +chrUn_JTFH01000780v1_decoy +chrUn_JTFH01000781v1_decoy +chrUn_JTFH01000782v1_decoy +chrUn_JTFH01000783v1_decoy +chrUn_JTFH01000784v1_decoy +chrUn_JTFH01000785v1_decoy +chrUn_JTFH01000786v1_decoy +chrUn_JTFH01000787v1_decoy +chrUn_JTFH01000788v1_decoy +chrUn_JTFH01000789v1_decoy +chrUn_JTFH01000790v1_decoy +chrUn_JTFH01000791v1_decoy +chrUn_JTFH01000792v1_decoy +chrUn_JTFH01000793v1_decoy +chrUn_JTFH01000794v1_decoy +chrUn_JTFH01000795v1_decoy +chrUn_JTFH01000796v1_decoy +chrUn_JTFH01000797v1_decoy +chrUn_JTFH01000798v1_decoy +chrUn_JTFH01000799v1_decoy +chrUn_JTFH01000800v1_decoy +chrUn_JTFH01000801v1_decoy +chrUn_JTFH01000802v1_decoy +chrUn_JTFH01000803v1_decoy +chrUn_JTFH01000804v1_decoy +chrUn_JTFH01000805v1_decoy +chrUn_JTFH01000806v1_decoy +chrUn_JTFH01000807v1_decoy +chrUn_JTFH01000808v1_decoy +chrUn_JTFH01000809v1_decoy +chrUn_JTFH01000810v1_decoy +chrUn_JTFH01000811v1_decoy +chrUn_JTFH01000812v1_decoy +chrUn_JTFH01000813v1_decoy +chrUn_JTFH01000814v1_decoy +chrUn_JTFH01000815v1_decoy +chrUn_JTFH01000816v1_decoy +chrUn_JTFH01000817v1_decoy +chrUn_JTFH01000818v1_decoy +chrUn_JTFH01000819v1_decoy +chrUn_JTFH01000820v1_decoy +chrUn_JTFH01000821v1_decoy +chrUn_JTFH01000822v1_decoy +chrUn_JTFH01000823v1_decoy +chrUn_JTFH01000824v1_decoy +chrUn_JTFH01000825v1_decoy +chrUn_JTFH01000826v1_decoy +chrUn_JTFH01000827v1_decoy +chrUn_JTFH01000828v1_decoy +chrUn_JTFH01000829v1_decoy +chrUn_JTFH01000830v1_decoy +chrUn_JTFH01000831v1_decoy +chrUn_JTFH01000832v1_decoy +chrUn_JTFH01000833v1_decoy +chrUn_JTFH01000834v1_decoy +chrUn_JTFH01000835v1_decoy +chrUn_JTFH01000836v1_decoy +chrUn_JTFH01000837v1_decoy +chrUn_JTFH01000838v1_decoy +chrUn_JTFH01000839v1_decoy +chrUn_JTFH01000840v1_decoy +chrUn_JTFH01000841v1_decoy +chrUn_JTFH01000842v1_decoy +chrUn_JTFH01000843v1_decoy +chrUn_JTFH01000844v1_decoy +chrUn_JTFH01000845v1_decoy +chrUn_JTFH01000846v1_decoy +chrUn_JTFH01000847v1_decoy +chrUn_JTFH01000848v1_decoy +chrUn_JTFH01000849v1_decoy +chrUn_JTFH01000850v1_decoy +chrUn_JTFH01000851v1_decoy +chrUn_JTFH01000852v1_decoy +chrUn_JTFH01000853v1_decoy +chrUn_JTFH01000854v1_decoy +chrUn_JTFH01000855v1_decoy +chrUn_JTFH01000856v1_decoy +chrUn_JTFH01000857v1_decoy +chrUn_JTFH01000858v1_decoy +chrUn_JTFH01000859v1_decoy +chrUn_JTFH01000860v1_decoy +chrUn_JTFH01000861v1_decoy +chrUn_JTFH01000862v1_decoy +chrUn_JTFH01000863v1_decoy +chrUn_JTFH01000864v1_decoy +chrUn_JTFH01000865v1_decoy +chrUn_JTFH01000866v1_decoy +chrUn_JTFH01000867v1_decoy +chrUn_JTFH01000868v1_decoy +chrUn_JTFH01000869v1_decoy +chrUn_JTFH01000870v1_decoy +chrUn_JTFH01000871v1_decoy +chrUn_JTFH01000872v1_decoy +chrUn_JTFH01000873v1_decoy +chrUn_JTFH01000874v1_decoy +chrUn_JTFH01000875v1_decoy +chrUn_JTFH01000876v1_decoy +chrUn_JTFH01000877v1_decoy +chrUn_JTFH01000878v1_decoy +chrUn_JTFH01000879v1_decoy +chrUn_JTFH01000880v1_decoy +chrUn_JTFH01000881v1_decoy +chrUn_JTFH01000882v1_decoy +chrUn_JTFH01000883v1_decoy +chrUn_JTFH01000884v1_decoy +chrUn_JTFH01000885v1_decoy +chrUn_JTFH01000886v1_decoy +chrUn_JTFH01000887v1_decoy +chrUn_JTFH01000888v1_decoy +chrUn_JTFH01000889v1_decoy +chrUn_JTFH01000890v1_decoy +chrUn_JTFH01000891v1_decoy +chrUn_JTFH01000892v1_decoy +chrUn_JTFH01000893v1_decoy +chrUn_JTFH01000894v1_decoy +chrUn_JTFH01000895v1_decoy +chrUn_JTFH01000896v1_decoy +chrUn_JTFH01000897v1_decoy +chrUn_JTFH01000898v1_decoy +chrUn_JTFH01000899v1_decoy +chrUn_JTFH01000900v1_decoy +chrUn_JTFH01000901v1_decoy +chrUn_JTFH01000902v1_decoy +chrUn_JTFH01000903v1_decoy +chrUn_JTFH01000904v1_decoy +chrUn_JTFH01000905v1_decoy +chrUn_JTFH01000906v1_decoy +chrUn_JTFH01000907v1_decoy +chrUn_JTFH01000908v1_decoy +chrUn_JTFH01000909v1_decoy +chrUn_JTFH01000910v1_decoy +chrUn_JTFH01000911v1_decoy +chrUn_JTFH01000912v1_decoy +chrUn_JTFH01000913v1_decoy +chrUn_JTFH01000914v1_decoy +chrUn_JTFH01000915v1_decoy +chrUn_JTFH01000916v1_decoy +chrUn_JTFH01000917v1_decoy +chrUn_JTFH01000918v1_decoy +chrUn_JTFH01000919v1_decoy +chrUn_JTFH01000920v1_decoy +chrUn_JTFH01000921v1_decoy +chrUn_JTFH01000922v1_decoy +chrUn_JTFH01000923v1_decoy +chrUn_JTFH01000924v1_decoy +chrUn_JTFH01000925v1_decoy +chrUn_JTFH01000926v1_decoy +chrUn_JTFH01000927v1_decoy +chrUn_JTFH01000928v1_decoy +chrUn_JTFH01000929v1_decoy +chrUn_JTFH01000930v1_decoy +chrUn_JTFH01000931v1_decoy +chrUn_JTFH01000932v1_decoy +chrUn_JTFH01000933v1_decoy +chrUn_JTFH01000934v1_decoy +chrUn_JTFH01000935v1_decoy +chrUn_JTFH01000936v1_decoy +chrUn_JTFH01000937v1_decoy +chrUn_JTFH01000938v1_decoy +chrUn_JTFH01000939v1_decoy +chrUn_JTFH01000940v1_decoy +chrUn_JTFH01000941v1_decoy +chrUn_JTFH01000942v1_decoy +chrUn_JTFH01000943v1_decoy +chrUn_JTFH01000944v1_decoy +chrUn_JTFH01000945v1_decoy +chrUn_JTFH01000946v1_decoy +chrUn_JTFH01000947v1_decoy +chrUn_JTFH01000948v1_decoy +chrUn_JTFH01000949v1_decoy +chrUn_JTFH01000950v1_decoy +chrUn_JTFH01000951v1_decoy +chrUn_JTFH01000952v1_decoy +chrUn_JTFH01000953v1_decoy +chrUn_JTFH01000954v1_decoy +chrUn_JTFH01000955v1_decoy +chrUn_JTFH01000956v1_decoy +chrUn_JTFH01000957v1_decoy +chrUn_JTFH01000958v1_decoy +chrUn_JTFH01000959v1_decoy +chrUn_JTFH01000960v1_decoy +chrUn_JTFH01000961v1_decoy +chrUn_JTFH01000962v1_decoy +chrUn_JTFH01000963v1_decoy +chrUn_JTFH01000964v1_decoy +chrUn_JTFH01000965v1_decoy +chrUn_JTFH01000966v1_decoy +chrUn_JTFH01000967v1_decoy +chrUn_JTFH01000968v1_decoy +chrUn_JTFH01000969v1_decoy +chrUn_JTFH01000970v1_decoy +chrUn_JTFH01000971v1_decoy +chrUn_JTFH01000972v1_decoy +chrUn_JTFH01000973v1_decoy +chrUn_JTFH01000974v1_decoy +chrUn_JTFH01000975v1_decoy +chrUn_JTFH01000976v1_decoy +chrUn_JTFH01000977v1_decoy +chrUn_JTFH01000978v1_decoy +chrUn_JTFH01000979v1_decoy +chrUn_JTFH01000980v1_decoy +chrUn_JTFH01000981v1_decoy +chrUn_JTFH01000982v1_decoy +chrUn_JTFH01000983v1_decoy +chrUn_JTFH01000984v1_decoy +chrUn_JTFH01000985v1_decoy +chrUn_JTFH01000986v1_decoy +chrUn_JTFH01000987v1_decoy +chrUn_JTFH01000988v1_decoy +chrUn_JTFH01000989v1_decoy +chrUn_JTFH01000990v1_decoy +chrUn_JTFH01000991v1_decoy +chrUn_JTFH01000992v1_decoy +chrUn_JTFH01000993v1_decoy +chrUn_JTFH01000994v1_decoy +chrUn_JTFH01000995v1_decoy +chrUn_JTFH01000996v1_decoy +chrUn_JTFH01000997v1_decoy +chrUn_JTFH01000998v1_decoy +chrUn_JTFH01000999v1_decoy +chrUn_JTFH01001000v1_decoy +chrUn_JTFH01001001v1_decoy +chrUn_JTFH01001002v1_decoy +chrUn_JTFH01001003v1_decoy +chrUn_JTFH01001004v1_decoy +chrUn_JTFH01001005v1_decoy +chrUn_JTFH01001006v1_decoy +chrUn_JTFH01001007v1_decoy +chrUn_JTFH01001008v1_decoy +chrUn_JTFH01001009v1_decoy +chrUn_JTFH01001010v1_decoy +chrUn_JTFH01001011v1_decoy +chrUn_JTFH01001012v1_decoy +chrUn_JTFH01001013v1_decoy +chrUn_JTFH01001014v1_decoy +chrUn_JTFH01001015v1_decoy +chrUn_JTFH01001016v1_decoy +chrUn_JTFH01001017v1_decoy +chrUn_JTFH01001018v1_decoy +chrUn_JTFH01001019v1_decoy +chrUn_JTFH01001020v1_decoy +chrUn_JTFH01001021v1_decoy +chrUn_JTFH01001022v1_decoy +chrUn_JTFH01001023v1_decoy +chrUn_JTFH01001024v1_decoy +chrUn_JTFH01001025v1_decoy +chrUn_JTFH01001026v1_decoy +chrUn_JTFH01001027v1_decoy +chrUn_JTFH01001028v1_decoy +chrUn_JTFH01001029v1_decoy +chrUn_JTFH01001030v1_decoy +chrUn_JTFH01001031v1_decoy +chrUn_JTFH01001032v1_decoy +chrUn_JTFH01001033v1_decoy +chrUn_JTFH01001034v1_decoy +chrUn_JTFH01001035v1_decoy +chrUn_JTFH01001036v1_decoy +chrUn_JTFH01001037v1_decoy +chrUn_JTFH01001038v1_decoy +chrUn_JTFH01001039v1_decoy +chrUn_JTFH01001040v1_decoy +chrUn_JTFH01001041v1_decoy +chrUn_JTFH01001042v1_decoy +chrUn_JTFH01001043v1_decoy +chrUn_JTFH01001044v1_decoy +chrUn_JTFH01001045v1_decoy +chrUn_JTFH01001046v1_decoy +chrUn_JTFH01001047v1_decoy +chrUn_JTFH01001048v1_decoy +chrUn_JTFH01001049v1_decoy +chrUn_JTFH01001050v1_decoy +chrUn_JTFH01001051v1_decoy +chrUn_JTFH01001052v1_decoy +chrUn_JTFH01001053v1_decoy +chrUn_JTFH01001054v1_decoy +chrUn_JTFH01001055v1_decoy +chrUn_JTFH01001056v1_decoy +chrUn_JTFH01001057v1_decoy +chrUn_JTFH01001058v1_decoy +chrUn_JTFH01001059v1_decoy +chrUn_JTFH01001060v1_decoy +chrUn_JTFH01001061v1_decoy +chrUn_JTFH01001062v1_decoy +chrUn_JTFH01001063v1_decoy +chrUn_JTFH01001064v1_decoy +chrUn_JTFH01001065v1_decoy +chrUn_JTFH01001066v1_decoy +chrUn_JTFH01001067v1_decoy +chrUn_JTFH01001068v1_decoy +chrUn_JTFH01001069v1_decoy +chrUn_JTFH01001070v1_decoy +chrUn_JTFH01001071v1_decoy +chrUn_JTFH01001072v1_decoy +chrUn_JTFH01001073v1_decoy +chrUn_JTFH01001074v1_decoy +chrUn_JTFH01001075v1_decoy +chrUn_JTFH01001076v1_decoy +chrUn_JTFH01001077v1_decoy +chrUn_JTFH01001078v1_decoy +chrUn_JTFH01001079v1_decoy +chrUn_JTFH01001080v1_decoy +chrUn_JTFH01001081v1_decoy +chrUn_JTFH01001082v1_decoy +chrUn_JTFH01001083v1_decoy +chrUn_JTFH01001084v1_decoy +chrUn_JTFH01001085v1_decoy +chrUn_JTFH01001086v1_decoy +chrUn_JTFH01001087v1_decoy +chrUn_JTFH01001088v1_decoy +chrUn_JTFH01001089v1_decoy +chrUn_JTFH01001090v1_decoy +chrUn_JTFH01001091v1_decoy +chrUn_JTFH01001092v1_decoy +chrUn_JTFH01001093v1_decoy +chrUn_JTFH01001094v1_decoy +chrUn_JTFH01001095v1_decoy +chrUn_JTFH01001096v1_decoy +chrUn_JTFH01001097v1_decoy +chrUn_JTFH01001098v1_decoy +chrUn_JTFH01001099v1_decoy +chrUn_JTFH01001100v1_decoy +chrUn_JTFH01001101v1_decoy +chrUn_JTFH01001102v1_decoy +chrUn_JTFH01001103v1_decoy +chrUn_JTFH01001104v1_decoy +chrUn_JTFH01001105v1_decoy +chrUn_JTFH01001106v1_decoy +chrUn_JTFH01001107v1_decoy +chrUn_JTFH01001108v1_decoy +chrUn_JTFH01001109v1_decoy +chrUn_JTFH01001110v1_decoy +chrUn_JTFH01001111v1_decoy +chrUn_JTFH01001112v1_decoy +chrUn_JTFH01001113v1_decoy +chrUn_JTFH01001114v1_decoy +chrUn_JTFH01001115v1_decoy +chrUn_JTFH01001116v1_decoy +chrUn_JTFH01001117v1_decoy +chrUn_JTFH01001118v1_decoy +chrUn_JTFH01001119v1_decoy +chrUn_JTFH01001120v1_decoy +chrUn_JTFH01001121v1_decoy +chrUn_JTFH01001122v1_decoy +chrUn_JTFH01001123v1_decoy +chrUn_JTFH01001124v1_decoy +chrUn_JTFH01001125v1_decoy +chrUn_JTFH01001126v1_decoy +chrUn_JTFH01001127v1_decoy +chrUn_JTFH01001128v1_decoy +chrUn_JTFH01001129v1_decoy +chrUn_JTFH01001130v1_decoy +chrUn_JTFH01001131v1_decoy +chrUn_JTFH01001132v1_decoy +chrUn_JTFH01001133v1_decoy +chrUn_JTFH01001134v1_decoy +chrUn_JTFH01001135v1_decoy +chrUn_JTFH01001136v1_decoy +chrUn_JTFH01001137v1_decoy +chrUn_JTFH01001138v1_decoy +chrUn_JTFH01001139v1_decoy +chrUn_JTFH01001140v1_decoy +chrUn_JTFH01001141v1_decoy +chrUn_JTFH01001142v1_decoy +chrUn_JTFH01001143v1_decoy +chrUn_JTFH01001144v1_decoy +chrUn_JTFH01001145v1_decoy +chrUn_JTFH01001146v1_decoy +chrUn_JTFH01001147v1_decoy +chrUn_JTFH01001148v1_decoy +chrUn_JTFH01001149v1_decoy +chrUn_JTFH01001150v1_decoy +chrUn_JTFH01001151v1_decoy +chrUn_JTFH01001152v1_decoy +chrUn_JTFH01001153v1_decoy +chrUn_JTFH01001154v1_decoy +chrUn_JTFH01001155v1_decoy +chrUn_JTFH01001156v1_decoy +chrUn_JTFH01001157v1_decoy +chrUn_JTFH01001158v1_decoy +chrUn_JTFH01001159v1_decoy +chrUn_JTFH01001160v1_decoy +chrUn_JTFH01001161v1_decoy +chrUn_JTFH01001162v1_decoy +chrUn_JTFH01001163v1_decoy +chrUn_JTFH01001164v1_decoy +chrUn_JTFH01001165v1_decoy +chrUn_JTFH01001166v1_decoy +chrUn_JTFH01001167v1_decoy +chrUn_JTFH01001168v1_decoy +chrUn_JTFH01001169v1_decoy +chrUn_JTFH01001170v1_decoy +chrUn_JTFH01001171v1_decoy +chrUn_JTFH01001172v1_decoy +chrUn_JTFH01001173v1_decoy +chrUn_JTFH01001174v1_decoy +chrUn_JTFH01001175v1_decoy +chrUn_JTFH01001176v1_decoy +chrUn_JTFH01001177v1_decoy +chrUn_JTFH01001178v1_decoy +chrUn_JTFH01001179v1_decoy +chrUn_JTFH01001180v1_decoy +chrUn_JTFH01001181v1_decoy +chrUn_JTFH01001182v1_decoy +chrUn_JTFH01001183v1_decoy +chrUn_JTFH01001184v1_decoy +chrUn_JTFH01001185v1_decoy +chrUn_JTFH01001186v1_decoy +chrUn_JTFH01001187v1_decoy +chrUn_JTFH01001188v1_decoy +chrUn_JTFH01001189v1_decoy +chrUn_JTFH01001190v1_decoy +chrUn_JTFH01001191v1_decoy +chrUn_JTFH01001192v1_decoy +chrUn_JTFH01001193v1_decoy +chrUn_JTFH01001194v1_decoy +chrUn_JTFH01001195v1_decoy +chrUn_JTFH01001196v1_decoy +chrUn_JTFH01001197v1_decoy +chrUn_JTFH01001198v1_decoy +chrUn_JTFH01001199v1_decoy +chrUn_JTFH01001200v1_decoy +chrUn_JTFH01001201v1_decoy +chrUn_JTFH01001202v1_decoy +chrUn_JTFH01001203v1_decoy +chrUn_JTFH01001204v1_decoy +chrUn_JTFH01001205v1_decoy +chrUn_JTFH01001206v1_decoy +chrUn_JTFH01001207v1_decoy +chrUn_JTFH01001208v1_decoy +chrUn_JTFH01001209v1_decoy +chrUn_JTFH01001210v1_decoy +chrUn_JTFH01001211v1_decoy +chrUn_JTFH01001212v1_decoy +chrUn_JTFH01001213v1_decoy +chrUn_JTFH01001214v1_decoy +chrUn_JTFH01001215v1_decoy +chrUn_JTFH01001216v1_decoy +chrUn_JTFH01001217v1_decoy +chrUn_JTFH01001218v1_decoy +chrUn_JTFH01001219v1_decoy +chrUn_JTFH01001220v1_decoy +chrUn_JTFH01001221v1_decoy +chrUn_JTFH01001222v1_decoy +chrUn_JTFH01001223v1_decoy +chrUn_JTFH01001224v1_decoy +chrUn_JTFH01001225v1_decoy +chrUn_JTFH01001226v1_decoy +chrUn_JTFH01001227v1_decoy +chrUn_JTFH01001228v1_decoy +chrUn_JTFH01001229v1_decoy +chrUn_JTFH01001230v1_decoy +chrUn_JTFH01001231v1_decoy +chrUn_JTFH01001232v1_decoy +chrUn_JTFH01001233v1_decoy +chrUn_JTFH01001234v1_decoy +chrUn_JTFH01001235v1_decoy +chrUn_JTFH01001236v1_decoy +chrUn_JTFH01001237v1_decoy +chrUn_JTFH01001238v1_decoy +chrUn_JTFH01001239v1_decoy +chrUn_JTFH01001240v1_decoy +chrUn_JTFH01001241v1_decoy +chrUn_JTFH01001242v1_decoy +chrUn_JTFH01001243v1_decoy +chrUn_JTFH01001244v1_decoy +chrUn_JTFH01001245v1_decoy +chrUn_JTFH01001246v1_decoy +chrUn_JTFH01001247v1_decoy +chrUn_JTFH01001248v1_decoy +chrUn_JTFH01001249v1_decoy +chrUn_JTFH01001250v1_decoy +chrUn_JTFH01001251v1_decoy +chrUn_JTFH01001252v1_decoy +chrUn_JTFH01001253v1_decoy +chrUn_JTFH01001254v1_decoy +chrUn_JTFH01001255v1_decoy +chrUn_JTFH01001256v1_decoy +chrUn_JTFH01001257v1_decoy +chrUn_JTFH01001258v1_decoy +chrUn_JTFH01001259v1_decoy +chrUn_JTFH01001260v1_decoy +chrUn_JTFH01001261v1_decoy +chrUn_JTFH01001262v1_decoy +chrUn_JTFH01001263v1_decoy +chrUn_JTFH01001264v1_decoy +chrUn_JTFH01001265v1_decoy +chrUn_JTFH01001266v1_decoy +chrUn_JTFH01001267v1_decoy +chrUn_JTFH01001268v1_decoy +chrUn_JTFH01001269v1_decoy +chrUn_JTFH01001270v1_decoy +chrUn_JTFH01001271v1_decoy +chrUn_JTFH01001272v1_decoy +chrUn_JTFH01001273v1_decoy +chrUn_JTFH01001274v1_decoy +chrUn_JTFH01001275v1_decoy +chrUn_JTFH01001276v1_decoy +chrUn_JTFH01001277v1_decoy +chrUn_JTFH01001278v1_decoy +chrUn_JTFH01001279v1_decoy +chrUn_JTFH01001280v1_decoy +chrUn_JTFH01001281v1_decoy +chrUn_JTFH01001282v1_decoy +chrUn_JTFH01001283v1_decoy +chrUn_JTFH01001284v1_decoy +chrUn_JTFH01001285v1_decoy +chrUn_JTFH01001286v1_decoy +chrUn_JTFH01001287v1_decoy +chrUn_JTFH01001288v1_decoy +chrUn_JTFH01001289v1_decoy +chrUn_JTFH01001290v1_decoy +chrUn_JTFH01001291v1_decoy +chrUn_JTFH01001292v1_decoy +chrUn_JTFH01001293v1_decoy +chrUn_JTFH01001294v1_decoy +chrUn_JTFH01001295v1_decoy +chrUn_JTFH01001296v1_decoy +chrUn_JTFH01001297v1_decoy +chrUn_JTFH01001298v1_decoy +chrUn_JTFH01001299v1_decoy +chrUn_JTFH01001300v1_decoy +chrUn_JTFH01001301v1_decoy +chrUn_JTFH01001302v1_decoy +chrUn_JTFH01001303v1_decoy +chrUn_JTFH01001304v1_decoy +chrUn_JTFH01001305v1_decoy +chrUn_JTFH01001306v1_decoy +chrUn_JTFH01001307v1_decoy +chrUn_JTFH01001308v1_decoy +chrUn_JTFH01001309v1_decoy +chrUn_JTFH01001310v1_decoy +chrUn_JTFH01001311v1_decoy +chrUn_JTFH01001312v1_decoy +chrUn_JTFH01001313v1_decoy +chrUn_JTFH01001314v1_decoy +chrUn_JTFH01001315v1_decoy +chrUn_JTFH01001316v1_decoy +chrUn_JTFH01001317v1_decoy +chrUn_JTFH01001318v1_decoy +chrUn_JTFH01001319v1_decoy +chrUn_JTFH01001320v1_decoy +chrUn_JTFH01001321v1_decoy +chrUn_JTFH01001322v1_decoy +chrUn_JTFH01001323v1_decoy +chrUn_JTFH01001324v1_decoy +chrUn_JTFH01001325v1_decoy +chrUn_JTFH01001326v1_decoy +chrUn_JTFH01001327v1_decoy +chrUn_JTFH01001328v1_decoy +chrUn_JTFH01001329v1_decoy +chrUn_JTFH01001330v1_decoy +chrUn_JTFH01001331v1_decoy +chrUn_JTFH01001332v1_decoy +chrUn_JTFH01001333v1_decoy +chrUn_JTFH01001334v1_decoy +chrUn_JTFH01001335v1_decoy +chrUn_JTFH01001336v1_decoy +chrUn_JTFH01001337v1_decoy +chrUn_JTFH01001338v1_decoy +chrUn_JTFH01001339v1_decoy +chrUn_JTFH01001340v1_decoy +chrUn_JTFH01001341v1_decoy +chrUn_JTFH01001342v1_decoy +chrUn_JTFH01001343v1_decoy +chrUn_JTFH01001344v1_decoy +chrUn_JTFH01001345v1_decoy +chrUn_JTFH01001346v1_decoy +chrUn_JTFH01001347v1_decoy +chrUn_JTFH01001348v1_decoy +chrUn_JTFH01001349v1_decoy +chrUn_JTFH01001350v1_decoy +chrUn_JTFH01001351v1_decoy +chrUn_JTFH01001352v1_decoy +chrUn_JTFH01001353v1_decoy +chrUn_JTFH01001354v1_decoy +chrUn_JTFH01001355v1_decoy +chrUn_JTFH01001356v1_decoy +chrUn_JTFH01001357v1_decoy +chrUn_JTFH01001358v1_decoy +chrUn_JTFH01001359v1_decoy +chrUn_JTFH01001360v1_decoy +chrUn_JTFH01001361v1_decoy +chrUn_JTFH01001362v1_decoy +chrUn_JTFH01001363v1_decoy +chrUn_JTFH01001364v1_decoy +chrUn_JTFH01001365v1_decoy +chrUn_JTFH01001366v1_decoy +chrUn_JTFH01001367v1_decoy +chrUn_JTFH01001368v1_decoy +chrUn_JTFH01001369v1_decoy +chrUn_JTFH01001370v1_decoy +chrUn_JTFH01001371v1_decoy +chrUn_JTFH01001372v1_decoy +chrUn_JTFH01001373v1_decoy +chrUn_JTFH01001374v1_decoy +chrUn_JTFH01001375v1_decoy +chrUn_JTFH01001376v1_decoy +chrUn_JTFH01001377v1_decoy +chrUn_JTFH01001378v1_decoy +chrUn_JTFH01001379v1_decoy +chrUn_JTFH01001380v1_decoy +chrUn_JTFH01001381v1_decoy +chrUn_JTFH01001382v1_decoy +chrUn_JTFH01001383v1_decoy +chrUn_JTFH01001384v1_decoy +chrUn_JTFH01001385v1_decoy +chrUn_JTFH01001386v1_decoy +chrUn_JTFH01001387v1_decoy +chrUn_JTFH01001388v1_decoy +chrUn_JTFH01001389v1_decoy +chrUn_JTFH01001390v1_decoy +chrUn_JTFH01001391v1_decoy +chrUn_JTFH01001392v1_decoy +chrUn_JTFH01001393v1_decoy +chrUn_JTFH01001394v1_decoy +chrUn_JTFH01001395v1_decoy +chrUn_JTFH01001396v1_decoy +chrUn_JTFH01001397v1_decoy +chrUn_JTFH01001398v1_decoy +chrUn_JTFH01001399v1_decoy +chrUn_JTFH01001400v1_decoy +chrUn_JTFH01001401v1_decoy +chrUn_JTFH01001402v1_decoy +chrUn_JTFH01001403v1_decoy +chrUn_JTFH01001404v1_decoy +chrUn_JTFH01001405v1_decoy +chrUn_JTFH01001406v1_decoy +chrUn_JTFH01001407v1_decoy +chrUn_JTFH01001408v1_decoy +chrUn_JTFH01001409v1_decoy +chrUn_JTFH01001410v1_decoy +chrUn_JTFH01001411v1_decoy +chrUn_JTFH01001412v1_decoy +chrUn_JTFH01001413v1_decoy +chrUn_JTFH01001414v1_decoy +chrUn_JTFH01001415v1_decoy +chrUn_JTFH01001416v1_decoy +chrUn_JTFH01001417v1_decoy +chrUn_JTFH01001418v1_decoy +chrUn_JTFH01001419v1_decoy +chrUn_JTFH01001420v1_decoy +chrUn_JTFH01001421v1_decoy +chrUn_JTFH01001422v1_decoy +chrUn_JTFH01001423v1_decoy +chrUn_JTFH01001424v1_decoy +chrUn_JTFH01001425v1_decoy +chrUn_JTFH01001426v1_decoy +chrUn_JTFH01001427v1_decoy +chrUn_JTFH01001428v1_decoy +chrUn_JTFH01001429v1_decoy +chrUn_JTFH01001430v1_decoy +chrUn_JTFH01001431v1_decoy +chrUn_JTFH01001432v1_decoy +chrUn_JTFH01001433v1_decoy +chrUn_JTFH01001434v1_decoy +chrUn_JTFH01001435v1_decoy +chrUn_JTFH01001436v1_decoy +chrUn_JTFH01001437v1_decoy +chrUn_JTFH01001438v1_decoy +chrUn_JTFH01001439v1_decoy +chrUn_JTFH01001440v1_decoy +chrUn_JTFH01001441v1_decoy +chrUn_JTFH01001442v1_decoy +chrUn_JTFH01001443v1_decoy +chrUn_JTFH01001444v1_decoy +chrUn_JTFH01001445v1_decoy +chrUn_JTFH01001446v1_decoy +chrUn_JTFH01001447v1_decoy +chrUn_JTFH01001448v1_decoy +chrUn_JTFH01001449v1_decoy +chrUn_JTFH01001450v1_decoy +chrUn_JTFH01001451v1_decoy +chrUn_JTFH01001452v1_decoy +chrUn_JTFH01001453v1_decoy +chrUn_JTFH01001454v1_decoy +chrUn_JTFH01001455v1_decoy +chrUn_JTFH01001456v1_decoy +chrUn_JTFH01001457v1_decoy +chrUn_JTFH01001458v1_decoy +chrUn_JTFH01001459v1_decoy +chrUn_JTFH01001460v1_decoy +chrUn_JTFH01001461v1_decoy +chrUn_JTFH01001462v1_decoy +chrUn_JTFH01001463v1_decoy +chrUn_JTFH01001464v1_decoy +chrUn_JTFH01001465v1_decoy +chrUn_JTFH01001466v1_decoy +chrUn_JTFH01001467v1_decoy +chrUn_JTFH01001468v1_decoy +chrUn_JTFH01001469v1_decoy +chrUn_JTFH01001470v1_decoy +chrUn_JTFH01001471v1_decoy +chrUn_JTFH01001472v1_decoy +chrUn_JTFH01001473v1_decoy +chrUn_JTFH01001474v1_decoy +chrUn_JTFH01001475v1_decoy +chrUn_JTFH01001476v1_decoy +chrUn_JTFH01001477v1_decoy +chrUn_JTFH01001478v1_decoy +chrUn_JTFH01001479v1_decoy +chrUn_JTFH01001480v1_decoy +chrUn_JTFH01001481v1_decoy +chrUn_JTFH01001482v1_decoy +chrUn_JTFH01001483v1_decoy +chrUn_JTFH01001484v1_decoy +chrUn_JTFH01001485v1_decoy +chrUn_JTFH01001486v1_decoy +chrUn_JTFH01001487v1_decoy +chrUn_JTFH01001488v1_decoy +chrUn_JTFH01001489v1_decoy +chrUn_JTFH01001490v1_decoy +chrUn_JTFH01001491v1_decoy +chrUn_JTFH01001492v1_decoy +chrUn_JTFH01001493v1_decoy +chrUn_JTFH01001494v1_decoy +chrUn_JTFH01001495v1_decoy +chrUn_JTFH01001496v1_decoy +chrUn_JTFH01001497v1_decoy +chrUn_JTFH01001498v1_decoy +chrUn_JTFH01001499v1_decoy +chrUn_JTFH01001500v1_decoy +chrUn_JTFH01001501v1_decoy +chrUn_JTFH01001502v1_decoy +chrUn_JTFH01001503v1_decoy +chrUn_JTFH01001504v1_decoy +chrUn_JTFH01001505v1_decoy +chrUn_JTFH01001506v1_decoy +chrUn_JTFH01001507v1_decoy +chrUn_JTFH01001508v1_decoy +chrUn_JTFH01001509v1_decoy +chrUn_JTFH01001510v1_decoy +chrUn_JTFH01001511v1_decoy +chrUn_JTFH01001512v1_decoy +chrUn_JTFH01001513v1_decoy +chrUn_JTFH01001514v1_decoy +chrUn_JTFH01001515v1_decoy +chrUn_JTFH01001516v1_decoy +chrUn_JTFH01001517v1_decoy +chrUn_JTFH01001518v1_decoy +chrUn_JTFH01001519v1_decoy +chrUn_JTFH01001520v1_decoy +chrUn_JTFH01001521v1_decoy +chrUn_JTFH01001522v1_decoy +chrUn_JTFH01001523v1_decoy +chrUn_JTFH01001524v1_decoy +chrUn_JTFH01001525v1_decoy +chrUn_JTFH01001526v1_decoy +chrUn_JTFH01001527v1_decoy +chrUn_JTFH01001528v1_decoy +chrUn_JTFH01001529v1_decoy +chrUn_JTFH01001530v1_decoy +chrUn_JTFH01001531v1_decoy +chrUn_JTFH01001532v1_decoy +chrUn_JTFH01001533v1_decoy +chrUn_JTFH01001534v1_decoy +chrUn_JTFH01001535v1_decoy +chrUn_JTFH01001536v1_decoy +chrUn_JTFH01001537v1_decoy +chrUn_JTFH01001538v1_decoy +chrUn_JTFH01001539v1_decoy +chrUn_JTFH01001540v1_decoy +chrUn_JTFH01001541v1_decoy +chrUn_JTFH01001542v1_decoy +chrUn_JTFH01001543v1_decoy +chrUn_JTFH01001544v1_decoy +chrUn_JTFH01001545v1_decoy +chrUn_JTFH01001546v1_decoy +chrUn_JTFH01001547v1_decoy +chrUn_JTFH01001548v1_decoy +chrUn_JTFH01001549v1_decoy +chrUn_JTFH01001550v1_decoy +chrUn_JTFH01001551v1_decoy +chrUn_JTFH01001552v1_decoy +chrUn_JTFH01001553v1_decoy +chrUn_JTFH01001554v1_decoy +chrUn_JTFH01001555v1_decoy +chrUn_JTFH01001556v1_decoy +chrUn_JTFH01001557v1_decoy +chrUn_JTFH01001558v1_decoy +chrUn_JTFH01001559v1_decoy +chrUn_JTFH01001560v1_decoy +chrUn_JTFH01001561v1_decoy +chrUn_JTFH01001562v1_decoy +chrUn_JTFH01001563v1_decoy +chrUn_JTFH01001564v1_decoy +chrUn_JTFH01001565v1_decoy +chrUn_JTFH01001566v1_decoy +chrUn_JTFH01001567v1_decoy +chrUn_JTFH01001568v1_decoy +chrUn_JTFH01001569v1_decoy +chrUn_JTFH01001570v1_decoy +chrUn_JTFH01001571v1_decoy +chrUn_JTFH01001572v1_decoy +chrUn_JTFH01001573v1_decoy +chrUn_JTFH01001574v1_decoy +chrUn_JTFH01001575v1_decoy +chrUn_JTFH01001576v1_decoy +chrUn_JTFH01001577v1_decoy +chrUn_JTFH01001578v1_decoy +chrUn_JTFH01001579v1_decoy +chrUn_JTFH01001580v1_decoy +chrUn_JTFH01001581v1_decoy +chrUn_JTFH01001582v1_decoy +chrUn_JTFH01001583v1_decoy +chrUn_JTFH01001584v1_decoy +chrUn_JTFH01001585v1_decoy +chrUn_JTFH01001586v1_decoy +chrUn_JTFH01001587v1_decoy +chrUn_JTFH01001588v1_decoy +chrUn_JTFH01001589v1_decoy +chrUn_JTFH01001590v1_decoy +chrUn_JTFH01001591v1_decoy +chrUn_JTFH01001592v1_decoy +chrUn_JTFH01001593v1_decoy +chrUn_JTFH01001594v1_decoy +chrUn_JTFH01001595v1_decoy +chrUn_JTFH01001596v1_decoy +chrUn_JTFH01001597v1_decoy +chrUn_JTFH01001598v1_decoy +chrUn_JTFH01001599v1_decoy +chrUn_JTFH01001600v1_decoy +chrUn_JTFH01001601v1_decoy +chrUn_JTFH01001602v1_decoy +chrUn_JTFH01001603v1_decoy +chrUn_JTFH01001604v1_decoy +chrUn_JTFH01001605v1_decoy +chrUn_JTFH01001606v1_decoy +chrUn_JTFH01001607v1_decoy +chrUn_JTFH01001608v1_decoy +chrUn_JTFH01001609v1_decoy +chrUn_JTFH01001610v1_decoy +chrUn_JTFH01001611v1_decoy +chrUn_JTFH01001612v1_decoy +chrUn_JTFH01001613v1_decoy +chrUn_JTFH01001614v1_decoy +chrUn_JTFH01001615v1_decoy +chrUn_JTFH01001616v1_decoy +chrUn_JTFH01001617v1_decoy +chrUn_JTFH01001618v1_decoy +chrUn_JTFH01001619v1_decoy +chrUn_JTFH01001620v1_decoy +chrUn_JTFH01001621v1_decoy +chrUn_JTFH01001622v1_decoy +chrUn_JTFH01001623v1_decoy +chrUn_JTFH01001624v1_decoy +chrUn_JTFH01001625v1_decoy +chrUn_JTFH01001626v1_decoy +chrUn_JTFH01001627v1_decoy +chrUn_JTFH01001628v1_decoy +chrUn_JTFH01001629v1_decoy +chrUn_JTFH01001630v1_decoy +chrUn_JTFH01001631v1_decoy +chrUn_JTFH01001632v1_decoy +chrUn_JTFH01001633v1_decoy +chrUn_JTFH01001634v1_decoy +chrUn_JTFH01001635v1_decoy +chrUn_JTFH01001636v1_decoy +chrUn_JTFH01001637v1_decoy +chrUn_JTFH01001638v1_decoy +chrUn_JTFH01001639v1_decoy +chrUn_JTFH01001640v1_decoy +chrUn_JTFH01001641v1_decoy +chrUn_JTFH01001642v1_decoy +chrUn_JTFH01001643v1_decoy +chrUn_JTFH01001644v1_decoy +chrUn_JTFH01001645v1_decoy +chrUn_JTFH01001646v1_decoy +chrUn_JTFH01001647v1_decoy +chrUn_JTFH01001648v1_decoy +chrUn_JTFH01001649v1_decoy +chrUn_JTFH01001650v1_decoy +chrUn_JTFH01001651v1_decoy +chrUn_JTFH01001652v1_decoy +chrUn_JTFH01001653v1_decoy +chrUn_JTFH01001654v1_decoy +chrUn_JTFH01001655v1_decoy +chrUn_JTFH01001656v1_decoy +chrUn_JTFH01001657v1_decoy +chrUn_JTFH01001658v1_decoy +chrUn_JTFH01001659v1_decoy +chrUn_JTFH01001660v1_decoy +chrUn_JTFH01001661v1_decoy +chrUn_JTFH01001662v1_decoy +chrUn_JTFH01001663v1_decoy +chrUn_JTFH01001664v1_decoy +chrUn_JTFH01001665v1_decoy +chrUn_JTFH01001666v1_decoy +chrUn_JTFH01001667v1_decoy +chrUn_JTFH01001668v1_decoy +chrUn_JTFH01001669v1_decoy +chrUn_JTFH01001670v1_decoy +chrUn_JTFH01001671v1_decoy +chrUn_JTFH01001672v1_decoy +chrUn_JTFH01001673v1_decoy +chrUn_JTFH01001674v1_decoy +chrUn_JTFH01001675v1_decoy +chrUn_JTFH01001676v1_decoy +chrUn_JTFH01001677v1_decoy +chrUn_JTFH01001678v1_decoy +chrUn_JTFH01001679v1_decoy +chrUn_JTFH01001680v1_decoy +chrUn_JTFH01001681v1_decoy +chrUn_JTFH01001682v1_decoy +chrUn_JTFH01001683v1_decoy +chrUn_JTFH01001684v1_decoy +chrUn_JTFH01001685v1_decoy +chrUn_JTFH01001686v1_decoy +chrUn_JTFH01001687v1_decoy +chrUn_JTFH01001688v1_decoy +chrUn_JTFH01001689v1_decoy +chrUn_JTFH01001690v1_decoy +chrUn_JTFH01001691v1_decoy +chrUn_JTFH01001692v1_decoy +chrUn_JTFH01001693v1_decoy +chrUn_JTFH01001694v1_decoy +chrUn_JTFH01001695v1_decoy +chrUn_JTFH01001696v1_decoy +chrUn_JTFH01001697v1_decoy +chrUn_JTFH01001698v1_decoy +chrUn_JTFH01001699v1_decoy +chrUn_JTFH01001700v1_decoy +chrUn_JTFH01001701v1_decoy +chrUn_JTFH01001702v1_decoy +chrUn_JTFH01001703v1_decoy +chrUn_JTFH01001704v1_decoy +chrUn_JTFH01001705v1_decoy +chrUn_JTFH01001706v1_decoy +chrUn_JTFH01001707v1_decoy +chrUn_JTFH01001708v1_decoy +chrUn_JTFH01001709v1_decoy +chrUn_JTFH01001710v1_decoy +chrUn_JTFH01001711v1_decoy +chrUn_JTFH01001712v1_decoy +chrUn_JTFH01001713v1_decoy +chrUn_JTFH01001714v1_decoy +chrUn_JTFH01001715v1_decoy +chrUn_JTFH01001716v1_decoy +chrUn_JTFH01001717v1_decoy +chrUn_JTFH01001718v1_decoy +chrUn_JTFH01001719v1_decoy +chrUn_JTFH01001720v1_decoy +chrUn_JTFH01001721v1_decoy +chrUn_JTFH01001722v1_decoy +chrUn_JTFH01001723v1_decoy +chrUn_JTFH01001724v1_decoy +chrUn_JTFH01001725v1_decoy +chrUn_JTFH01001726v1_decoy +chrUn_JTFH01001727v1_decoy +chrUn_JTFH01001728v1_decoy +chrUn_JTFH01001729v1_decoy +chrUn_JTFH01001730v1_decoy +chrUn_JTFH01001731v1_decoy +chrUn_JTFH01001732v1_decoy +chrUn_JTFH01001733v1_decoy +chrUn_JTFH01001734v1_decoy +chrUn_JTFH01001735v1_decoy +chrUn_JTFH01001736v1_decoy +chrUn_JTFH01001737v1_decoy +chrUn_JTFH01001738v1_decoy +chrUn_JTFH01001739v1_decoy +chrUn_JTFH01001740v1_decoy +chrUn_JTFH01001741v1_decoy +chrUn_JTFH01001742v1_decoy +chrUn_JTFH01001743v1_decoy +chrUn_JTFH01001744v1_decoy +chrUn_JTFH01001745v1_decoy +chrUn_JTFH01001746v1_decoy +chrUn_JTFH01001747v1_decoy +chrUn_JTFH01001748v1_decoy +chrUn_JTFH01001749v1_decoy +chrUn_JTFH01001750v1_decoy +chrUn_JTFH01001751v1_decoy +chrUn_JTFH01001752v1_decoy +chrUn_JTFH01001753v1_decoy +chrUn_JTFH01001754v1_decoy +chrUn_JTFH01001755v1_decoy +chrUn_JTFH01001756v1_decoy +chrUn_JTFH01001757v1_decoy +chrUn_JTFH01001758v1_decoy +chrUn_JTFH01001759v1_decoy +chrUn_JTFH01001760v1_decoy +chrUn_JTFH01001761v1_decoy +chrUn_JTFH01001762v1_decoy +chrUn_JTFH01001763v1_decoy +chrUn_JTFH01001764v1_decoy +chrUn_JTFH01001765v1_decoy +chrUn_JTFH01001766v1_decoy +chrUn_JTFH01001767v1_decoy +chrUn_JTFH01001768v1_decoy +chrUn_JTFH01001769v1_decoy +chrUn_JTFH01001770v1_decoy +chrUn_JTFH01001771v1_decoy +chrUn_JTFH01001772v1_decoy +chrUn_JTFH01001773v1_decoy +chrUn_JTFH01001774v1_decoy +chrUn_JTFH01001775v1_decoy +chrUn_JTFH01001776v1_decoy +chrUn_JTFH01001777v1_decoy +chrUn_JTFH01001778v1_decoy +chrUn_JTFH01001779v1_decoy +chrUn_JTFH01001780v1_decoy +chrUn_JTFH01001781v1_decoy +chrUn_JTFH01001782v1_decoy +chrUn_JTFH01001783v1_decoy +chrUn_JTFH01001784v1_decoy +chrUn_JTFH01001785v1_decoy +chrUn_JTFH01001786v1_decoy +chrUn_JTFH01001787v1_decoy +chrUn_JTFH01001788v1_decoy +chrUn_JTFH01001789v1_decoy +chrUn_JTFH01001790v1_decoy +chrUn_JTFH01001791v1_decoy +chrUn_JTFH01001792v1_decoy +chrUn_JTFH01001793v1_decoy +chrUn_JTFH01001794v1_decoy +chrUn_JTFH01001795v1_decoy +chrUn_JTFH01001796v1_decoy +chrUn_JTFH01001797v1_decoy +chrUn_JTFH01001798v1_decoy +chrUn_JTFH01001799v1_decoy +chrUn_JTFH01001800v1_decoy +chrUn_JTFH01001801v1_decoy +chrUn_JTFH01001802v1_decoy +chrUn_JTFH01001803v1_decoy +chrUn_JTFH01001804v1_decoy +chrUn_JTFH01001805v1_decoy +chrUn_JTFH01001806v1_decoy +chrUn_JTFH01001807v1_decoy +chrUn_JTFH01001808v1_decoy +chrUn_JTFH01001809v1_decoy +chrUn_JTFH01001810v1_decoy +chrUn_JTFH01001811v1_decoy +chrUn_JTFH01001812v1_decoy +chrUn_JTFH01001813v1_decoy +chrUn_JTFH01001814v1_decoy +chrUn_JTFH01001815v1_decoy +chrUn_JTFH01001816v1_decoy +chrUn_JTFH01001817v1_decoy +chrUn_JTFH01001818v1_decoy +chrUn_JTFH01001819v1_decoy +chrUn_JTFH01001820v1_decoy +chrUn_JTFH01001821v1_decoy +chrUn_JTFH01001822v1_decoy +chrUn_JTFH01001823v1_decoy +chrUn_JTFH01001824v1_decoy +chrUn_JTFH01001825v1_decoy +chrUn_JTFH01001826v1_decoy +chrUn_JTFH01001827v1_decoy +chrUn_JTFH01001828v1_decoy +chrUn_JTFH01001829v1_decoy +chrUn_JTFH01001830v1_decoy +chrUn_JTFH01001831v1_decoy +chrUn_JTFH01001832v1_decoy +chrUn_JTFH01001833v1_decoy +chrUn_JTFH01001834v1_decoy +chrUn_JTFH01001835v1_decoy +chrUn_JTFH01001836v1_decoy +chrUn_JTFH01001837v1_decoy +chrUn_JTFH01001838v1_decoy +chrUn_JTFH01001839v1_decoy +chrUn_JTFH01001840v1_decoy +chrUn_JTFH01001841v1_decoy +chrUn_JTFH01001842v1_decoy +chrUn_JTFH01001843v1_decoy +chrUn_JTFH01001844v1_decoy +chrUn_JTFH01001845v1_decoy +chrUn_JTFH01001846v1_decoy +chrUn_JTFH01001847v1_decoy +chrUn_JTFH01001848v1_decoy +chrUn_JTFH01001849v1_decoy +chrUn_JTFH01001850v1_decoy +chrUn_JTFH01001851v1_decoy +chrUn_JTFH01001852v1_decoy +chrUn_JTFH01001853v1_decoy +chrUn_JTFH01001854v1_decoy +chrUn_JTFH01001855v1_decoy +chrUn_JTFH01001856v1_decoy +chrUn_JTFH01001857v1_decoy +chrUn_JTFH01001858v1_decoy +chrUn_JTFH01001859v1_decoy +chrUn_JTFH01001860v1_decoy +chrUn_JTFH01001861v1_decoy +chrUn_JTFH01001862v1_decoy +chrUn_JTFH01001863v1_decoy +chrUn_JTFH01001864v1_decoy +chrUn_JTFH01001865v1_decoy +chrUn_JTFH01001866v1_decoy +chrUn_JTFH01001867v1_decoy +chrUn_JTFH01001868v1_decoy +chrUn_JTFH01001869v1_decoy +chrUn_JTFH01001870v1_decoy +chrUn_JTFH01001871v1_decoy +chrUn_JTFH01001872v1_decoy +chrUn_JTFH01001873v1_decoy +chrUn_JTFH01001874v1_decoy +chrUn_JTFH01001875v1_decoy +chrUn_JTFH01001876v1_decoy +chrUn_JTFH01001877v1_decoy +chrUn_JTFH01001878v1_decoy +chrUn_JTFH01001879v1_decoy +chrUn_JTFH01001880v1_decoy +chrUn_JTFH01001881v1_decoy +chrUn_JTFH01001882v1_decoy +chrUn_JTFH01001883v1_decoy +chrUn_JTFH01001884v1_decoy +chrUn_JTFH01001885v1_decoy +chrUn_JTFH01001886v1_decoy +chrUn_JTFH01001887v1_decoy +chrUn_JTFH01001888v1_decoy +chrUn_JTFH01001889v1_decoy +chrUn_JTFH01001890v1_decoy +chrUn_JTFH01001891v1_decoy +chrUn_JTFH01001892v1_decoy +chrUn_JTFH01001893v1_decoy +chrUn_JTFH01001894v1_decoy +chrUn_JTFH01001895v1_decoy +chrUn_JTFH01001896v1_decoy +chrUn_JTFH01001897v1_decoy +chrUn_JTFH01001898v1_decoy +chrUn_JTFH01001899v1_decoy +chrUn_JTFH01001900v1_decoy +chrUn_JTFH01001901v1_decoy +chrUn_JTFH01001902v1_decoy +chrUn_JTFH01001903v1_decoy +chrUn_JTFH01001904v1_decoy +chrUn_JTFH01001905v1_decoy +chrUn_JTFH01001906v1_decoy +chrUn_JTFH01001907v1_decoy +chrUn_JTFH01001908v1_decoy +chrUn_JTFH01001909v1_decoy +chrUn_JTFH01001910v1_decoy +chrUn_JTFH01001911v1_decoy +chrUn_JTFH01001912v1_decoy +chrUn_JTFH01001913v1_decoy +chrUn_JTFH01001914v1_decoy +chrUn_JTFH01001915v1_decoy +chrUn_JTFH01001916v1_decoy +chrUn_JTFH01001917v1_decoy +chrUn_JTFH01001918v1_decoy +chrUn_JTFH01001919v1_decoy +chrUn_JTFH01001920v1_decoy +chrUn_JTFH01001921v1_decoy +chrUn_JTFH01001922v1_decoy +chrUn_JTFH01001923v1_decoy +chrUn_JTFH01001924v1_decoy +chrUn_JTFH01001925v1_decoy +chrUn_JTFH01001926v1_decoy +chrUn_JTFH01001927v1_decoy +chrUn_JTFH01001928v1_decoy +chrUn_JTFH01001929v1_decoy +chrUn_JTFH01001930v1_decoy +chrUn_JTFH01001931v1_decoy +chrUn_JTFH01001932v1_decoy +chrUn_JTFH01001933v1_decoy +chrUn_JTFH01001934v1_decoy +chrUn_JTFH01001935v1_decoy +chrUn_JTFH01001936v1_decoy +chrUn_JTFH01001937v1_decoy +chrUn_JTFH01001938v1_decoy +chrUn_JTFH01001939v1_decoy +chrUn_JTFH01001940v1_decoy +chrUn_JTFH01001941v1_decoy +chrUn_JTFH01001942v1_decoy +chrUn_JTFH01001943v1_decoy +chrUn_JTFH01001944v1_decoy +chrUn_JTFH01001945v1_decoy +chrUn_JTFH01001946v1_decoy +chrUn_JTFH01001947v1_decoy +chrUn_JTFH01001948v1_decoy +chrUn_JTFH01001949v1_decoy +chrUn_JTFH01001950v1_decoy +chrUn_JTFH01001951v1_decoy +chrUn_JTFH01001952v1_decoy +chrUn_JTFH01001953v1_decoy +chrUn_JTFH01001954v1_decoy +chrUn_JTFH01001955v1_decoy +chrUn_JTFH01001956v1_decoy +chrUn_JTFH01001957v1_decoy +chrUn_JTFH01001958v1_decoy +chrUn_JTFH01001959v1_decoy +chrUn_JTFH01001960v1_decoy +chrUn_JTFH01001961v1_decoy +chrUn_JTFH01001962v1_decoy +chrUn_JTFH01001963v1_decoy +chrUn_JTFH01001964v1_decoy +chrUn_JTFH01001965v1_decoy +chrUn_JTFH01001966v1_decoy +chrUn_JTFH01001967v1_decoy +chrUn_JTFH01001968v1_decoy +chrUn_JTFH01001969v1_decoy +chrUn_JTFH01001970v1_decoy +chrUn_JTFH01001971v1_decoy +chrUn_JTFH01001972v1_decoy +chrUn_JTFH01001973v1_decoy +chrUn_JTFH01001974v1_decoy +chrUn_JTFH01001975v1_decoy +chrUn_JTFH01001976v1_decoy +chrUn_JTFH01001977v1_decoy +chrUn_JTFH01001978v1_decoy +chrUn_JTFH01001979v1_decoy +chrUn_JTFH01001980v1_decoy +chrUn_JTFH01001981v1_decoy +chrUn_JTFH01001982v1_decoy +chrUn_JTFH01001983v1_decoy +chrUn_JTFH01001984v1_decoy +chrUn_JTFH01001985v1_decoy +chrUn_JTFH01001986v1_decoy +chrUn_JTFH01001987v1_decoy +chrUn_JTFH01001988v1_decoy +chrUn_JTFH01001989v1_decoy +chrUn_JTFH01001990v1_decoy +chrUn_JTFH01001991v1_decoy +chrUn_JTFH01001992v1_decoy +chrUn_JTFH01001993v1_decoy +chrUn_JTFH01001994v1_decoy +chrUn_JTFH01001995v1_decoy +chrUn_JTFH01001996v1_decoy +chrUn_JTFH01001997v1_decoy +chrUn_JTFH01001998v1_decoy +HLA-A*01:01:01:01 +HLA-A*01:01:01:02N +HLA-A*01:01:38L +HLA-A*01:02 +HLA-A*01:03 +HLA-A*01:04N +HLA-A*01:09 +HLA-A*01:11N +HLA-A*01:14 +HLA-A*01:16N +HLA-A*01:20 +HLA-A*02:01:01:01 +HLA-A*02:01:01:02L +HLA-A*02:01:01:03 +HLA-A*02:01:01:04 +HLA-A*02:02:01 +HLA-A*02:03:01 +HLA-A*02:03:03 +HLA-A*02:05:01 +HLA-A*02:06:01 +HLA-A*02:07:01 +HLA-A*02:10 +HLA-A*02:251 +HLA-A*02:259 +HLA-A*02:264 +HLA-A*02:265 +HLA-A*02:266 +HLA-A*02:269 +HLA-A*02:279 +HLA-A*02:32N +HLA-A*02:376 +HLA-A*02:43N +HLA-A*02:455 +HLA-A*02:48 +HLA-A*02:51 +HLA-A*02:533 +HLA-A*02:53N +HLA-A*02:57 +HLA-A*02:60:01 +HLA-A*02:65 +HLA-A*02:68 +HLA-A*02:77 +HLA-A*02:81 +HLA-A*02:89 +HLA-A*02:95 +HLA-A*03:01:01:01 +HLA-A*03:01:01:02N +HLA-A*03:01:01:03 +HLA-A*03:02:01 +HLA-A*03:11N +HLA-A*03:21N +HLA-A*03:36N +HLA-A*11:01:01 +HLA-A*11:01:18 +HLA-A*11:02:01 +HLA-A*11:05 +HLA-A*11:110 +HLA-A*11:25 +HLA-A*11:50Q +HLA-A*11:60 +HLA-A*11:69N +HLA-A*11:74 +HLA-A*11:75 +HLA-A*11:77 +HLA-A*23:01:01 +HLA-A*23:09 +HLA-A*23:38N +HLA-A*24:02:01:01 +HLA-A*24:02:01:02L +HLA-A*24:02:01:03 +HLA-A*24:02:03Q +HLA-A*24:02:10 +HLA-A*24:03:01 +HLA-A*24:07:01 +HLA-A*24:08 +HLA-A*24:09N +HLA-A*24:10:01 +HLA-A*24:11N +HLA-A*24:152 +HLA-A*24:20 +HLA-A*24:215 +HLA-A*24:61 +HLA-A*24:86N +HLA-A*25:01:01 +HLA-A*26:01:01 +HLA-A*26:11N +HLA-A*26:15 +HLA-A*26:50 +HLA-A*29:01:01:01 +HLA-A*29:01:01:02N +HLA-A*29:02:01:01 +HLA-A*29:02:01:02 +HLA-A*29:46 +HLA-A*30:01:01 +HLA-A*30:02:01:01 +HLA-A*30:02:01:02 +HLA-A*30:04:01 +HLA-A*30:89 +HLA-A*31:01:02 +HLA-A*31:01:23 +HLA-A*31:04 +HLA-A*31:14N +HLA-A*31:46 +HLA-A*32:01:01 +HLA-A*32:06 +HLA-A*33:01:01 +HLA-A*33:03:01 +HLA-A*33:07 +HLA-A*34:01:01 +HLA-A*34:02:01 +HLA-A*36:01 +HLA-A*43:01 +HLA-A*66:01:01 +HLA-A*66:17 +HLA-A*68:01:01:01 +HLA-A*68:01:01:02 +HLA-A*68:01:02:01 +HLA-A*68:01:02:02 +HLA-A*68:02:01:01 +HLA-A*68:02:01:02 +HLA-A*68:02:01:03 +HLA-A*68:02:02 +HLA-A*68:03:01 +HLA-A*68:08:01 +HLA-A*68:113 +HLA-A*68:17 +HLA-A*68:18N +HLA-A*68:22 +HLA-A*68:71 +HLA-A*69:01 +HLA-A*74:01 +HLA-A*74:02:01:01 +HLA-A*74:02:01:02 +HLA-A*80:01:01:01 +HLA-A*80:01:01:02 +HLA-B*07:02:01 +HLA-B*07:05:01 +HLA-B*07:06 +HLA-B*07:156 +HLA-B*07:33:01 +HLA-B*07:41 +HLA-B*07:44 +HLA-B*07:50 +HLA-B*08:01:01 +HLA-B*08:08N +HLA-B*08:132 +HLA-B*08:134 +HLA-B*08:19N +HLA-B*08:20 +HLA-B*08:33 +HLA-B*08:79 +HLA-B*13:01:01 +HLA-B*13:02:01 +HLA-B*13:02:03 +HLA-B*13:02:09 +HLA-B*13:08 +HLA-B*13:15 +HLA-B*13:25 +HLA-B*14:01:01 +HLA-B*14:02:01 +HLA-B*14:07N +HLA-B*15:01:01:01 +HLA-B*15:01:01:02N +HLA-B*15:01:01:03 +HLA-B*15:02:01 +HLA-B*15:03:01 +HLA-B*15:04:01 +HLA-B*15:07:01 +HLA-B*15:108 +HLA-B*15:10:01 +HLA-B*15:11:01 +HLA-B*15:13:01 +HLA-B*15:16:01 +HLA-B*15:17:01:01 +HLA-B*15:17:01:02 +HLA-B*15:18:01 +HLA-B*15:220 +HLA-B*15:25:01 +HLA-B*15:27:01 +HLA-B*15:32:01 +HLA-B*15:42 +HLA-B*15:58 +HLA-B*15:66 +HLA-B*15:77 +HLA-B*15:83 +HLA-B*18:01:01:01 +HLA-B*18:01:01:02 +HLA-B*18:02 +HLA-B*18:03 +HLA-B*18:17N +HLA-B*18:26 +HLA-B*18:94N +HLA-B*27:04:01 +HLA-B*27:05:02 +HLA-B*27:05:18 +HLA-B*27:06 +HLA-B*27:07:01 +HLA-B*27:131 +HLA-B*27:24 +HLA-B*27:25 +HLA-B*27:32 +HLA-B*35:01:01:01 +HLA-B*35:01:01:02 +HLA-B*35:01:22 +HLA-B*35:02:01 +HLA-B*35:03:01 +HLA-B*35:05:01 +HLA-B*35:08:01 +HLA-B*35:14:02 +HLA-B*35:241 +HLA-B*35:41 +HLA-B*37:01:01 +HLA-B*37:01:05 +HLA-B*38:01:01 +HLA-B*38:02:01 +HLA-B*38:14 +HLA-B*39:01:01:01 +HLA-B*39:01:01:02L +HLA-B*39:01:01:03 +HLA-B*39:01:03 +HLA-B*39:01:16 +HLA-B*39:01:21 +HLA-B*39:05:01 +HLA-B*39:06:02 +HLA-B*39:10:01 +HLA-B*39:13:02 +HLA-B*39:14 +HLA-B*39:34 +HLA-B*39:38Q +HLA-B*40:01:01 +HLA-B*40:01:02 +HLA-B*40:02:01 +HLA-B*40:03 +HLA-B*40:06:01:01 +HLA-B*40:06:01:02 +HLA-B*40:10:01 +HLA-B*40:150 +HLA-B*40:40 +HLA-B*40:72:01 +HLA-B*40:79 +HLA-B*41:01:01 +HLA-B*41:02:01 +HLA-B*42:01:01 +HLA-B*42:02 +HLA-B*42:08 +HLA-B*44:02:01:01 +HLA-B*44:02:01:02S +HLA-B*44:02:01:03 +HLA-B*44:02:17 +HLA-B*44:02:27 +HLA-B*44:03:01 +HLA-B*44:03:02 +HLA-B*44:04 +HLA-B*44:09 +HLA-B*44:138Q +HLA-B*44:150 +HLA-B*44:23N +HLA-B*44:26 +HLA-B*44:46 +HLA-B*44:49 +HLA-B*44:56N +HLA-B*45:01:01 +HLA-B*45:04 +HLA-B*46:01:01 +HLA-B*46:01:05 +HLA-B*47:01:01:01 +HLA-B*47:01:01:02 +HLA-B*48:01:01 +HLA-B*48:03:01 +HLA-B*48:04 +HLA-B*48:08 +HLA-B*49:01:01 +HLA-B*49:32 +HLA-B*50:01:01 +HLA-B*51:01:01 +HLA-B*51:01:02 +HLA-B*51:02:01 +HLA-B*51:07:01 +HLA-B*51:42 +HLA-B*52:01:01:01 +HLA-B*52:01:01:02 +HLA-B*52:01:01:03 +HLA-B*52:01:02 +HLA-B*53:01:01 +HLA-B*53:11 +HLA-B*54:01:01 +HLA-B*54:18 +HLA-B*55:01:01 +HLA-B*55:01:03 +HLA-B*55:02:01 +HLA-B*55:12 +HLA-B*55:24 +HLA-B*55:48 +HLA-B*56:01:01 +HLA-B*56:03 +HLA-B*56:04 +HLA-B*57:01:01 +HLA-B*57:03:01 +HLA-B*57:06 +HLA-B*57:11 +HLA-B*57:29 +HLA-B*58:01:01 +HLA-B*58:31N +HLA-B*59:01:01:01 +HLA-B*59:01:01:02 +HLA-B*67:01:01 +HLA-B*67:01:02 +HLA-B*67:02 +HLA-B*73:01 +HLA-B*78:01:01 +HLA-B*81:01 +HLA-B*82:02:01 +HLA-C*01:02:01 +HLA-C*01:02:11 +HLA-C*01:02:29 +HLA-C*01:02:30 +HLA-C*01:03 +HLA-C*01:06 +HLA-C*01:08 +HLA-C*01:14 +HLA-C*01:21 +HLA-C*01:30 +HLA-C*01:40 +HLA-C*02:02:02:01 +HLA-C*02:02:02:02 +HLA-C*02:10 +HLA-C*02:11 +HLA-C*02:16:02 +HLA-C*02:69 +HLA-C*02:85 +HLA-C*02:86 +HLA-C*02:87 +HLA-C*03:02:01 +HLA-C*03:02:02:01 +HLA-C*03:02:02:02 +HLA-C*03:02:02:03 +HLA-C*03:03:01 +HLA-C*03:04:01:01 +HLA-C*03:04:01:02 +HLA-C*03:04:02 +HLA-C*03:04:04 +HLA-C*03:05 +HLA-C*03:06 +HLA-C*03:100 +HLA-C*03:13:01 +HLA-C*03:20N +HLA-C*03:219 +HLA-C*03:261 +HLA-C*03:40:01 +HLA-C*03:41:02 +HLA-C*03:46 +HLA-C*03:61 +HLA-C*04:01:01:01 +HLA-C*04:01:01:02 +HLA-C*04:01:01:03 +HLA-C*04:01:01:04 +HLA-C*04:01:01:05 +HLA-C*04:01:62 +HLA-C*04:03:01 +HLA-C*04:06 +HLA-C*04:09N +HLA-C*04:128 +HLA-C*04:161 +HLA-C*04:177 +HLA-C*04:70 +HLA-C*04:71 +HLA-C*05:01:01:01 +HLA-C*05:01:01:02 +HLA-C*05:08 +HLA-C*05:09:01 +HLA-C*05:93 +HLA-C*06:02:01:01 +HLA-C*06:02:01:02 +HLA-C*06:02:01:03 +HLA-C*06:23 +HLA-C*06:24 +HLA-C*06:46N +HLA-C*07:01:01:01 +HLA-C*07:01:01:02 +HLA-C*07:01:02 +HLA-C*07:01:19 +HLA-C*07:01:27 +HLA-C*07:01:45 +HLA-C*07:02:01:01 +HLA-C*07:02:01:02 +HLA-C*07:02:01:03 +HLA-C*07:02:01:04 +HLA-C*07:02:01:05 +HLA-C*07:02:05 +HLA-C*07:02:06 +HLA-C*07:02:64 +HLA-C*07:04:01 +HLA-C*07:04:02 +HLA-C*07:06 +HLA-C*07:149 +HLA-C*07:18 +HLA-C*07:19 +HLA-C*07:26 +HLA-C*07:30 +HLA-C*07:32N +HLA-C*07:384 +HLA-C*07:385 +HLA-C*07:386 +HLA-C*07:391 +HLA-C*07:392 +HLA-C*07:49 +HLA-C*07:56:02 +HLA-C*07:66 +HLA-C*07:67 +HLA-C*08:01:01 +HLA-C*08:01:03 +HLA-C*08:02:01:01 +HLA-C*08:02:01:02 +HLA-C*08:03:01 +HLA-C*08:04:01 +HLA-C*08:112 +HLA-C*08:20 +HLA-C*08:21 +HLA-C*08:22 +HLA-C*08:24 +HLA-C*08:27 +HLA-C*08:36N +HLA-C*08:40 +HLA-C*08:41 +HLA-C*08:62 +HLA-C*12:02:02 +HLA-C*12:03:01:01 +HLA-C*12:03:01:02 +HLA-C*12:08 +HLA-C*12:13 +HLA-C*12:19 +HLA-C*12:22 +HLA-C*12:99 +HLA-C*14:02:01 +HLA-C*14:03 +HLA-C*14:21N +HLA-C*14:23 +HLA-C*15:02:01 +HLA-C*15:05:01 +HLA-C*15:05:02 +HLA-C*15:13 +HLA-C*15:16 +HLA-C*15:17 +HLA-C*15:96Q +HLA-C*16:01:01 +HLA-C*16:02:01 +HLA-C*16:04:01 +HLA-C*17:01:01:01 +HLA-C*17:01:01:02 +HLA-C*17:01:01:03 +HLA-C*17:03 +HLA-C*18:01 +HLA-DQA1*01:01:02 +HLA-DQA1*01:02:01:01 +HLA-DQA1*01:02:01:02 +HLA-DQA1*01:02:01:03 +HLA-DQA1*01:02:01:04 +HLA-DQA1*01:03:01:01 +HLA-DQA1*01:03:01:02 +HLA-DQA1*01:04:01:01 +HLA-DQA1*01:04:01:02 +HLA-DQA1*01:05:01 +HLA-DQA1*01:07 +HLA-DQA1*01:10 +HLA-DQA1*01:11 +HLA-DQA1*02:01 +HLA-DQA1*03:01:01 +HLA-DQA1*03:02 +HLA-DQA1*03:03:01 +HLA-DQA1*04:01:02:01 +HLA-DQA1*04:01:02:02 +HLA-DQA1*04:02 +HLA-DQA1*05:01:01:01 +HLA-DQA1*05:01:01:02 +HLA-DQA1*05:03 +HLA-DQA1*05:05:01:01 +HLA-DQA1*05:05:01:02 +HLA-DQA1*05:05:01:03 +HLA-DQA1*05:11 +HLA-DQA1*06:01:01 +HLA-DQB1*02:01:01 +HLA-DQB1*02:02:01 +HLA-DQB1*03:01:01:01 +HLA-DQB1*03:01:01:02 +HLA-DQB1*03:01:01:03 +HLA-DQB1*03:02:01 +HLA-DQB1*03:03:02:01 +HLA-DQB1*03:03:02:02 +HLA-DQB1*03:03:02:03 +HLA-DQB1*03:05:01 +HLA-DQB1*05:01:01:01 +HLA-DQB1*05:01:01:02 +HLA-DQB1*05:03:01:01 +HLA-DQB1*05:03:01:02 +HLA-DQB1*06:01:01 +HLA-DQB1*06:02:01 +HLA-DQB1*06:03:01 +HLA-DQB1*06:09:01 +HLA-DRB1*01:01:01 +HLA-DRB1*01:02:01 +HLA-DRB1*03:01:01:01 +HLA-DRB1*03:01:01:02 +HLA-DRB1*04:03:01 +HLA-DRB1*07:01:01:01 +HLA-DRB1*07:01:01:02 +HLA-DRB1*08:03:02 +HLA-DRB1*09:21 +HLA-DRB1*10:01:01 +HLA-DRB1*11:01:01 +HLA-DRB1*11:01:02 +HLA-DRB1*11:04:01 +HLA-DRB1*12:01:01 +HLA-DRB1*12:17 +HLA-DRB1*13:01:01 +HLA-DRB1*13:02:01 +HLA-DRB1*14:05:01 +HLA-DRB1*14:54:01 +HLA-DRB1*15:01:01:01 +HLA-DRB1*15:01:01:02 +HLA-DRB1*15:01:01:03 +HLA-DRB1*15:01:01:04 +HLA-DRB1*15:02:01 +HLA-DRB1*15:03:01:01 +HLA-DRB1*15:03:01:02 +HLA-DRB1*16:02:01 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/outputs/cpx_reinterpreted_simple_1_seg.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/outputs/cpx_reinterpreted_simple_1_seg.vcf new file mode 100644 index 00000000000..8eeb1cd1d31 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/outputs/cpx_reinterpreted_simple_1_seg.vcf @@ -0,0 +1,55 @@ +##fileformat=VCFv4.2 +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +chr20 12558793 INS_chr20_12558793_12558809_CPX_DERIVED AAAAAAAAAAAAAAAAA . . CPX_EVENT=CPX_chr20:12558793-12558810;CTG_NAMES=asm027960:tig00003;END=12558809;SVLEN=133;SVTYPE=INS +chr20 18675720 INS_chr20_18675720_18675720_CPX_DERIVED A . . CPX_EVENT=CPX_chr20:18675721-18675877;CTG_NAMES=asm028012:tig00004;END=18675720;SVLEN=408;SVTYPE=INS +chr20 20269131 DEL_chr20_20269131_20269198_CPX_DERIVED A . . CPX_EVENT=CPX_chr20:20269131-20269199;CTG_NAMES=asm028026:tig00000;END=20269198;SVLEN=-67;SVTYPE=DEL +chr20 28561412 INS_chr20_28561412_28561412_CPX_DERIVED A . . CPX_EVENT=CPX_chr20:28561412-28561576;CTG_NAMES=asm008294:tig00014;END=28561412;SVLEN=75;SVTYPE=INS +chr20 28561575 INS_chr20_28561575_28561575_CPX_DERIVED A . . CPX_EVENT=CPX_chr20:28561412-28561576;CTG_NAMES=asm008294:tig00014;END=28561575;SVLEN=75;SVTYPE=INS +chr20 51740560 INS_chr20_51740560_51740560_CPX_DERIVED A . . CPX_EVENT=CPX_chr20:51740560-51740561;CTG_NAMES=asm028558:tig00002,asm028558:tig00003;END=51740560;SVLEN=549;SVTYPE=INS +chr20 51740560 INS_chr20_51740560_51740560_CPX_DERIVED A . . CPX_EVENT=CPX_chr20:51740560-51741035;CTG_NAMES=asm028558:tig00000,asm028558:tig00001;END=51740560;SVLEN=549;SVTYPE=INS +chr20 51741034 INS_chr20_51741034_51741034_CPX_DERIVED T . . CPX_EVENT=CPX_chr20:51740560-51741035;CTG_NAMES=asm028558:tig00000,asm028558:tig00001;END=51741034;SVLEN=50;SVTYPE=INS +chr20 54849491 INS_chr20_54849491_54849491_CPX_DERIVED C . . CPX_EVENT=CPX_chr20:54849491-54849615;CTG_NAMES=asm028586:tig00000;END=54849491;SVLEN=140;SVTYPE=INS +chr20 54849491 DEL_chr20_54849491_54849614_CPX_DERIVED C . . CPX_EVENT=CPX_chr20:54849491-54849615;CTG_NAMES=asm028586:tig00000;END=54849614;SVLEN=-123;SVTYPE=DEL +chr20 58695019 INS_chr20_58695019_58695019_CPX_DERIVED G . . CPX_EVENT=CPX_chr20:58695019-58695020;CTG_NAMES=asm028638:tig00002;END=58695019;SVLEN=549;SVTYPE=INS +chr20 64097041 INS_chr20_64097041_64097041_CPX_DERIVED A . . CPX_EVENT=CPX_chr20:64096905-64097041;CTG_NAMES=asm028821:tig00001;END=64097041;SVLEN=318;SVTYPE=INS diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/outputs/cpx_reinterpreted_simple_multi_seg.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/outputs/cpx_reinterpreted_simple_multi_seg.vcf new file mode 100644 index 00000000000..53d4b9be686 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/spark/sv/integration/outputs/cpx_reinterpreted_simple_multi_seg.vcf @@ -0,0 +1,69 @@ +##fileformat=VCFv4.2 +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +chr20 2379968 INS_chr20_2379968_2379968_CPX_DERIVED G . . CPX_EVENT=CPX_chr20:2379056-2379968;CTG_NAMES=asm027854:tig00003;END=2379968;SVLEN=213;SVTYPE=INS +chr20 23122666 DEL_chr20_23122666_23122995_CPX_DERIVED C . . ALIGN_LENGTHS=876,876;CPX_EVENT=CPX_chr20:23122561-23122996;CTG_NAMES=asm028059:tig00000,asm028059:tig00001;END=23122995;HQ_MAPPINGS=2;INSLEN=36;INSSEQ=TTCACCTGCAAGCCCTCCCACACGGTGACATGACAG;INSSEQ_MAP=1254_1295_chr20:23122559-23122600_-_1253H42M874H_60_0_42_O,1254_1295_chr20:23122559-23122600_-_1253H42M874H_60_0_42_O;MAPPING_QUALITIES=60,60;MAX_ALIGN_LENGTH=876;SEQ_ALT_HAPLOTYPE=TTCACCTGCAAGCCCTCCCACACGGTGACATGACAG;SVLEN=-329;SVTYPE=DEL;TOTAL_MAPPINGS=2 +chr20 38124003 INS-DUPLICATION-TANDEM-EXPANSION_chr20_38124003_38124003_CPX_DERIVED C . . ALIGN_LENGTHS=231,231;CPX_EVENT=CPX_chr20:38123803-38124181,CPX_chr20:38123803-38124181;CTG_NAMES=asm028411:tig00000,asm028411:tig00001;DUP_NUM=1,2;DUP_ORIENTATIONS=++;DUP_REPEAT_UNIT_REF_SPAN=chr20:38124004-38124014;DUP_SEQ_CIGARS=11M,11M;END=38124003;EXPANSION;HQ_MAPPINGS=2;INSLEN=606;INSSEQ=TAATATACCTATTATATATAATATACCTATTATATATAAAATATACCTATTATATATAATATACCTATTATATACATTATATATAATATACCTATTATATGCATTATATATAATATACCTATTATATGCATTATATATAATATACCTATTATATGCATTATATATAATATACCTATTATATGCATTATATATAATATACCTATTATATGCATTATATAATATATTATATATAATATGCATATTATATGTATTATATATTATATATTATATATATAATATACATATTATATGTATTATATATTATATATTATATATATAATATACATATTATATGTATTATATATTATATATAATATACATATTATATATTATATATTATATACATATTATATATTATATATTATATACATATTATATATTATATACATATTATATATTATATATTATATACATATTATACATTATATATATCTAAAATATATAATACACATTATATATTATATAATACACATTATATATAATATATAATACACATTATATATTATATATAATACACATTATATATTATATATAATACACATTATATATTATATAATACACATTATA;INSSEQ_MAP=1523_1590_chr20:38123803-38123868_+_1522H31M2I35M2301H_60_3_43_O,1523_1590_chr20:38123803-38123868_+_1522H31M2I35M2301H_60_3_43_O;MAPPING_QUALITIES=60,60;MAX_ALIGN_LENGTH=231;SEQ_ALT_HAPLOTYPE=TATTATATATATAATATACCTATTATATATAATATACCTATTATATATAAAATATACCTATTATATATAATATACCTATTATATACATTATATATAATATACCTATTATATGCATTATATATAATATACCTATTATATGCATTATATATAATATACCTATTATATGCATTATATATAATATACCTATTATATGCATTATATATAATATACCTATTATATGCATTATATAATATATTATATATAATATGCATATTATATGTATTATATATTATATATTATATATATAATATACATATTATATGTATTATATATTATATATTATATATATAATATACATATTATATGTATTATATATTATATATAATATACATATTATATATTATATATTATATACATATTATATATTATATATTATATACATATTATATATTATATACATATTATATATTATATATTATATACATATTATACATTATATATATCTAAAATATATAATACACATTATATATTATATAATACACATTATATATAATATATAATACACATTATATATTATATATAATACACATTATATATTATATATAATACACATTATATATTATATAATACACATTATATATTATATATA;SVLEN=617;SVTYPE=INS;TOTAL_MAPPINGS=2 +chr20 38653053 INS_chr20_38653053_38653053_CPX_DERIVED A . . CPX_EVENT=CPX_chr20:38653054-38653283;CTG_NAMES=asm028418:tig00000;END=38653053;SVLEN=259;SVTYPE=INS +chr20 38653112 INS-DUPLICATION-TANDEM-EXPANSION_chr20_38653112_38653112_CPX_DERIVED A . . ALIGN_LENGTHS=224;CPX_EVENT=CPX_chr20:38653054-38653283;CTG_NAMES=asm028418:tig00000;DUP_NUM=1,2;DUP_ORIENTATIONS=++;DUP_REPEAT_UNIT_REF_SPAN=chr20:38653113-38653268;DUP_SEQ_CIGARS=67M44I89M,57M21I99M;END=38653112;EXPANSION;HQ_MAPPINGS=1;INSLEN=135;INSSEQ=CTGGTGATGATAATGGTGGTGGTGGTGGTGATGGTGATGATGATTATGATGGTGGTGGTGGTGGTGGTGGTGCTGGTGATAGTGGTGGTGGTGGTGCTGGTGATGATAATGGTGGTGGTGGTGATGATGGTGATG;INSSEQ_MAP=1546_1871_chr20:38653054-38653385_-_1545H128M15D64M6I105M3I20M1613H_59_53_100_O;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=224;SEQ_ALT_HAPLOTYPE=ATGATTGTGATGGTGGTGTTGGTGGTGGTGATAATGATGGTAGTGGTGGTGGTGATGATGGTGATGATGATTATGATGGTGTGTTGGTGGTGCTGGTGATGATAATGGTGGTGGTGGTGGTGGTGATGGAAATGATGATGATGTTAATTGTGGTGTTGATGATGGTAATGATAATGATTGTGATGGTGGTGTTGGTGGTGCTGGTGATGATAATGGTGGTGGTGGTGGTGATGGTGATGATGATTATGATGGTGGTGGTGGTGGTGGTGGTGCTGGTGATAGTGGTGGTGGTGGTGCTGGTGATGATAATGGTGGTGGTGGTGATGATGGTGATGATGATTATGATGGTGGTGTTGGTGGTGCTGGTGATGATAATCATGCTGGTGGTGGTGGCGTTGATGATGGTGACAGTAGTGGTGATGATGGTGGTGGTGGTGATGGAAATGATGATGATGTTAGTTGTGGTGTTGATGATGGTAATGATAATGATTGTGATGATGGTGGTGGTGGTG;SVLEN=291;SVTYPE=DUP;TOTAL_MAPPINGS=1 +chr20 38653283 INS_chr20_38653283_38653283_CPX_DERIVED G . . CPX_EVENT=CPX_chr20:38653054-38653283;CTG_NAMES=asm028418:tig00000;END=38653283;SVLEN=175;SVTYPE=INS +chr20 47895195 DEL_chr20_47895195_47895293_CPX_DERIVED A . . CPX_EVENT=CPX_chr20:47895195-47895581;CTG_NAMES=asm028508:tig00005;END=47895293;SVLEN=-98;SVTYPE=DEL +chr20 47895482 DEL_chr20_47895482_47895580_CPX_DERIVED G . . ALIGN_LENGTHS=189;CPX_EVENT=CPX_chr20:47895195-47895581;CTG_NAMES=asm028508:tig00005;END=47895580;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=189;SVLEN=-98;SVTYPE=DEL;TOTAL_MAPPINGS=1 +chr20 61375462 INS-DUPLICATION-TANDEM-EXPANSION_chr20_61375462_61375462_CPX_DERIVED A . . ALIGN_LENGTHS=358;CPX_EVENT=CPX_chr20:61375650-61376102;CTG_NAMES=asm028687:tig00001;DUP_ANNOTATIONS_IMPRECISE;DUP_IMPRECISE_AFFECTED_RANGE=chr20:61375463-61376102;DUP_NUM=1,2;DUP_ORIENTATIONS=++;DUP_REPEAT_UNIT_REF_SPAN=chr20:61375463-61375819;END=61375462;EXPANSION;HOMLEN=283;HOMSEQ=GTGATGGTGTGGTTTGTTGATGGTAGTGTGATGCTCTTGGTGCTGGTGGTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGATGATGGTGGTGCTGGTGGTGGTCATAGCACTGGTGGTGATGGTATGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGGTGATGATAGTGGGGTTTATTGATGGTAGTGTGATGGTCTTGGTGGTGCTGATAATGGTGTGGTTTGTTGATGATAGTGTGATGGTCTTGGTGGTGGTGG;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=358;SEQ_ALT_HAPLOTYPE=GTGATGGTGTGGTTTATTGATGGCAGTGTGATTGTCTTGGTGGTGGTGATGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGATGATGGTGGCGCTGGTGGTGGTCATAGCACTGGTGGTGGTCATAGCACTGGTGGTGATGGTATGGTTTGTTGATGGTAGTGTGATGATCTTGGTGGTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGATGGTGTAGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGATGATGGTGGTGCTGGTGGTGGTCATAGCACTGGTGGTGATGCTATGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGCTCTTGGTGCTGGTGGTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGATGATGGTGGTGCTGGTGGTGGTCATAGCACTGGTGGTGATGGTATGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGGTGATGATAGTGGGGTTTATTGATGGTAGTGTGATGGTCTTGGTGGTGCTGATAATGGTGTGGTTTGTTGATGATAGTGTGATGGTCTTGGTGGTGGTGGCGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGGTATGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGGTGGTGGTGATGATGTAGTTTGTTGATGGTAGCGTGATGTTCTTGGTGCTGGTGGTGGTGATGATGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGGTGGTGGTGATAGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGATGCTGGTGATGGTATTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTCGTGGTGGTGGTGTGGTTTGTCGATGGTAGTGTGGTGGTCTTGGTGCTGGTGG;SVLEN=357;SVTYPE=DUP;TOTAL_MAPPINGS=1 +chr20 61375649 INS-DUPLICATION-TANDEM-EXPANSION_chr20_61375649_61375649_CPX_DERIVED T . . ALIGN_LENGTHS=430;CPX_EVENT=CPX_chr20:61375650-61376102;CTG_NAMES=asm028687:tig00001;DUP_ANNOTATIONS_IMPRECISE;DUP_IMPRECISE_AFFECTED_RANGE=chr20:61375650-61376103;DUP_NUM=1,2;DUP_ORIENTATIONS=++;DUP_REPEAT_UNIT_REF_SPAN=chr20:61375650-61375892;END=61375649;EXPANSION;HOMLEN=211;HOMSEQ=TGGTGGTGGTGATGATGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGGTGGTGGTGATAGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGATGCTGGTGATGGTATTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTCGTGGTGGTGGTGTGGTTTGTCGATGGTAGTGTGGTGGTCTTGGTGCTGGTGGT;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=430;SEQ_ALT_HAPLOTYPE=TGGTGGTGATGATAGTGGGGTTTATTGATGGTAGTGTGATGGTCTTGGTGGTGCTGATAATGGTGTGGTTTGTTGATGATAGTGTGATGGTCTTGGTGGTGGTGGCGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGGTATGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGGTGGTGGTGATGATGTAGTTTGTTGATGGTAGCGTGATGTTCTTGGTGCTGGTGGTGGTGATGATGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGGTGGTGGTGGTGGTGATAGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGATGCTGGTGATGGTATTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTCGTGGTGGTGGTGTGGTTTGTCGATGGTAGTGTGGTGGTCTTGGTGCTGGTGGTTGTCGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGGTGGTGATGTGGTTTGTTGATGATAGTGTGATGGTCTTGGTGGTGGTAGTGATGGTGTGGTTTGCTGATGGTAGTGTGATGGTCTTGGTGGTGGTGATACTGTGGTTTGTGGATGGTAGTGTGATGGTCTTGGTGGTGATGGTGTGGTTTGTTGATGGTAGTGTGATGGTCTTGGTGCTGGTGGT;SVLEN=243;SVTYPE=DUP;TOTAL_MAPPINGS=1 +chr20 61375649 INS_chr20_61375649_61375649_CPX_DERIVED T . . CPX_EVENT=CPX_chr20:61375650-61376102;CTG_NAMES=asm028687:tig00001;END=61375649;SVLEN=572;SVTYPE=INS +chr20 61919907 INS-DUPLICATION-TANDEM-EXPANSION_chr20_61919907_61919907_CPX_DERIVED C . . ALIGN_LENGTHS=149;CPX_EVENT=CPX_chr20:61919906-61920109;CTG_NAMES=asm028707:tig00000;DUP_ANNOTATIONS_IMPRECISE;DUP_IMPRECISE_AFFECTED_RANGE=chr20:61919908-61920109;DUP_NUM=1,2;DUP_ORIENTATIONS=++;DUP_REPEAT_UNIT_REF_SPAN=chr20:61919908-61920054;END=61919907;EXPANSION;HOMLEN=55;HOMSEQ=GTGATTGTGTGGAAGCGTGGTGTCACGGTGATTGCGTGGAAGCGTGTTGTGATTG;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=149;SEQ_ALT_HAPLOTYPE=GTGATTGTGTGGAAGTGTGATGTCACGGTGATTGCGTGGAAGCGTGTTGTGATTGTGTGGAAGCGTGGTATCGTGATTGGAAGTGTGGTGTCACGCTGATTGCATGGAAGTGTGTTGTGATTGTGTGGAAGCGTGATATCGCAGTGATTGTGTGGAAGCGTGGTGTCACGGTGATTGTGTGGAAGCGTGGTGTCACGGTGATTGCGTGGAAGCGTGTTGTGATTGTGTGGAAGCGTGGTATCGTGATCGGAAGCGTGGTGTTGCGGTGATTGCATGGAAGCATGTTGTGATTGTGTGGAAGCATGGTATCGTGATTGTCTGGAAGCATGGTGTCATGGTGATTGGAAGTGTGTCGTGATTG;SVLEN=147;SVTYPE=DUP;TOTAL_MAPPINGS=1 +chr20 61920109 INS_chr20_61920109_61920109_CPX_DERIVED G . . CPX_EVENT=CPX_chr20:61919906-61920109;CTG_NAMES=asm028707:tig00000;END=61920109;SVLEN=531;SVTYPE=INS +chr20 62452037 INS-DUPLICATION-TANDEM-EXPANSION_chr20_62452037_62452037_CPX_DERIVED A . . ALIGN_LENGTHS=59;CPX_EVENT=CPX_chr20:62452038-62452236;CTG_NAMES=asm028732:tig00001;DUP_NUM=1,2;DUP_ORIENTATIONS=++;DUP_REPEAT_UNIT_REF_SPAN=chr20:62452038-62452041;DUP_SEQ_CIGARS=4M,4M;END=62452037;EXPANSION;HQ_MAPPINGS=1;INSLEN=149;INSSEQ=GGGGGGGGGGGGGGGGAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAGGGAGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAGGGGGGGGGGGGGGGGGGGAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=59;SEQ_ALT_HAPLOTYPE=GGGGGGGGGGGGGGGGGGGGAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAGGGAGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAGGGGGGGGGGGGGGGGGGGAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG;SVLEN=153;SVTYPE=INS;TOTAL_MAPPINGS=1 +chr20 62452087 DEL_chr20_62452087_62452235_CPX_DERIVED A . . ALIGN_LENGTHS=50;CPX_EVENT=CPX_chr20:62452038-62452236;CTG_NAMES=asm028732:tig00001;END=62452235;HOMLEN=9;HOMSEQ=GAAGGAGAG;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=50;SVLEN=-148;SVTYPE=DEL;TOTAL_MAPPINGS=1 +chr20 63092255 INS_chr20_63092255_63092255_CPX_DERIVED G . . ALIGN_LENGTHS=1091;CPX_EVENT=CPX_chr20:63092255-63094246;CTG_NAMES=asm028762:tig00002;END=63092255;HQ_MAPPINGS=1;INSLEN=71;INSSEQ=GCCCAGGTTCCCGGGGCTGCGTGGGAGACACAGAAGTGGGGGCACCTCCGCAGCACCCACATCCTGCCGAT;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=1091;SEQ_ALT_HAPLOTYPE=GCCCAGGTTCCCGGGGCTGCGTGGGAGACACAGAAGTGGGGGCACCTCCGCAGCACCCACATCCTGCCGAT;SVLEN=71;SVTYPE=INS;TOTAL_MAPPINGS=1 +chr20 63093346 DEL_chr20_63093346_63094245_CPX_DERIVED G . . ALIGN_LENGTHS=942;CPX_EVENT=CPX_chr20:63092255-63094246;CTG_NAMES=asm028762:tig00002;END=63094245;HQ_MAPPINGS=1;INSLEN=1;INSSEQ=T;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=942;SEQ_ALT_HAPLOTYPE=T;SVLEN=-899;SVTYPE=DEL;TOTAL_MAPPINGS=1 +chr20 63353949 DEL_chr20_63353949_63354001_CPX_DERIVED T . . ALIGN_LENGTHS=80;CPX_EVENT=CPX_chr20:63353949-63354272;CTG_NAMES=asm028777:tig00001;END=63354001;HQ_MAPPINGS=1;MAPPING_QUALITIES=60;MAX_ALIGN_LENGTH=80;SVLEN=-52;SVTYPE=DEL;TOTAL_MAPPINGS=1 +chr20 63354205 DEL_chr20_63354205_63354271_CPX_DERIVED G . . CPX_EVENT=CPX_chr20:63353949-63354272;CTG_NAMES=asm028777:tig00001;END=63354271;SVLEN=-66;SVTYPE=DEL +chr21 21264943 INS_chr21_21264943_21264943_CPX_DERIVED G . . CPX_EVENT=CPX_chr21:21264944-21265096;CTG_NAMES=asm029034:tig00000,asm029034:tig00001;END=21264943;SVLEN=221;SVTYPE=INS +chr21 21264961 INS-DUPLICATION-TANDEM-EXPANSION_chr21_21264961_21264961_CPX_DERIVED G . . ALIGN_LENGTHS=110,110;CPX_EVENT=CPX_chr21:21264944-21265096,CPX_chr21:21264944-21265096;CTG_NAMES=asm029034:tig00000,asm029034:tig00001;DUP_ANNOTATIONS_IMPRECISE;DUP_IMPRECISE_AFFECTED_RANGE=chr21:21264962-21265096;DUP_NUM=1,2;DUP_ORIENTATIONS=++;DUP_REPEAT_UNIT_REF_SPAN=chr21:21264962-21265070;END=21264961;EXPANSION;HOMLEN=26;HOMSEQ=TATATATACACATATATATTATATAT;HQ_MAPPINGS=2;MAPPING_QUALITIES=60,60;MAX_ALIGN_LENGTH=110;SEQ_ALT_HAPLOTYPE=TATATATACACATATATATTATATATGTGTATGTGTATATATACACATATATATTATATATGTGTATGTGTATATATACACATATATATTATATATGTGTATGTGTATATATATACACATATATATTATATATGTGTATGTGTATATATATACACATATATATTATATATGTGTATATGTATATATACACATATATATTATATATATATGTGTCTGTATATATATACACATATATATTATATAT;SVLEN=109;SVTYPE=DUP;TOTAL_MAPPINGS=2 +chr21 23428920 INS_chr21_23428920_23428920_CPX_DERIVED T . . CPX_EVENT=CPX_chr21:23428920-23429023;CTG_NAMES=asm029052:tig00000,asm029052:tig00001;END=23428920;SVLEN=85;SVTYPE=INS +chr21 23428920 INS_chr21_23428920_23428967_CPX_DERIVED TTTATATAAATATATATAAATATATAATATATAATAATATAATATAAT . . ALIGN_LENGTHS=56,56;CPX_EVENT=CPX_chr21:23428920-23429023,CPX_chr21:23428920-23429023;CTG_NAMES=asm029052:tig00000,asm029052:tig00001;END=23428967;HQ_MAPPINGS=2;INSLEN=85;INSSEQ=ATAAATATATATAATAATATATAATTATATATTATATTATATAATATAATAATATATATTATAATACATTATATAATATATTATA;MAPPING_QUALITIES=60,60;MAX_ALIGN_LENGTH=56;SEQ_ALT_HAPLOTYPE=ATAAATATATATAATAATATATAATTATATATTATATTATATAATATAATAATATATATTATAATACATTATATAATATATTATA;SVLEN=85;SVTYPE=INS;TOTAL_MAPPINGS=2 +chr21 26001843 INV_chr21_26001843_26002384_CPX_DERIVED T . . CPX_EVENT=CPX_chr21:26001843-26002386;CTG_NAMES=asm029075:tig00000;END=26002384;SVLEN=0;SVTYPE=INV +chr21 46069065 INS_chr21_46069065_46069065_CPX_DERIVED C . . ALIGN_LENGTHS=91,91;CPX_EVENT=CPX_chr21:46069065-46069209;CTG_NAMES=asm029362:tig00001,asm029362:tig00002;END=46069065;HQ_MAPPINGS=2;INSLEN=60;INSSEQ=CTAGGTGTGTGCATGTGTGCACACGTGTGTGCATGTGTGTGCATGTGTGCACACGTGTGT;MAPPING_QUALITIES=60,60;MAX_ALIGN_LENGTH=91;SEQ_ALT_HAPLOTYPE=CTAGGTGTGTGCATGTGTGCACACGTGTGTGCATGTGTGTGCATGTGTGCACACGTGTGT;SVLEN=60;SVTYPE=INS;TOTAL_MAPPINGS=2 +chr21 46069156 DEL_chr21_46069156_46069208_CPX_DERIVED A . . ALIGN_LENGTHS=91,91;CPX_EVENT=CPX_chr21:46069065-46069209;CTG_NAMES=asm029362:tig00001,asm029362:tig00002;END=46069208;HQ_MAPPINGS=2;MAPPING_QUALITIES=60,60;MAX_ALIGN_LENGTH=91;SVLEN=-52;SVTYPE=DEL;TOTAL_MAPPINGS=2