Skip to content

Commit

Permalink
init integration test
Browse files Browse the repository at this point in the history
  • Loading branch information
SHuang-Broad committed May 15, 2018
1 parent fedda69 commit b93d584
Showing 1 changed file with 160 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
package org.broadinstitute.hellbender.tools.spark.sv.integration;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.hadoop.fs.Path;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.engine.FeatureDataSource;
import org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource;
import org.broadinstitute.hellbender.engine.datasources.ReferenceWindowFunctions;
import org.broadinstitute.hellbender.engine.spark.SparkContextFactory;
import org.broadinstitute.hellbender.engine.spark.datasources.ReadsSparkSource;
import org.broadinstitute.hellbender.engine.spark.datasources.VariantsSparkSource;
import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.AnnotatedVariantProducer;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoveryInputMetaData;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.SegmentedCpxVariantSimpleVariantExtractor;
import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVVCFWriter;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.test.ArgumentsBuilder;
import org.broadinstitute.hellbender.utils.test.MiniClusterUtils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

public class CpxVariantReInterpreterSparkIntegrationTest extends CommandLineProgramTest {

private final String complexVCF = "/Users/shuang/Desktop/callsets/reInterpret/accumulate.vcf";
private final String assemblyAlignmentsAccompanyingComplexVCF = "/Users/shuang/Desktop/callsets/reInterpret/accumulate.sam";
private final String twoBitRefURL = "/Users/shuang/Ref/Homo_sapiens_assembly38.2bit";
private final String nonCanonicalChromosomeNamesFile = "/Users/shuang/Project/SV/Misc/Homo_sapiens_assembly38.kill.alts";
private static final List<String> annotationsToIgnoreWhenComparingVariants =
Arrays.asList(GATKSVVCFConstants.ALIGN_LENGTHS,
GATKSVVCFConstants.CONTIG_NAMES,
GATKSVVCFConstants.INSERTED_SEQUENCE_MAPPINGS,
GATKSVVCFConstants.TOTAL_MAPPINGS,
GATKSVVCFConstants.SPLIT_READ_SUPPORT,
GATKSVVCFConstants.READ_PAIR_SUPPORT);

private static final String expectedSimpleVCF = "";

// an integrative test by reading multi segment complex variant calls from a test file
@Test(groups = "sv", enabled = false)
public void integrativeTestExtractingSimpleVariantsFromMultiSegmentCalls() {
final Logger localLogger = LogManager.getLogger(AnnotatedVariantProducer.class);
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final JavaRDD<VariantContext> complexVariants = new VariantsSparkSource(ctx)
.getParallelVariantContexts(complexVCF, null);
final ReadsSparkSource readsSparkSource = new ReadsSparkSource(ctx);
final JavaRDD<GATKRead> assemblyAlignments = readsSparkSource
.getParallelReads(assemblyAlignmentsAccompanyingComplexVCF, twoBitRefURL);
final SAMFileHeader headerForReads = readsSparkSource
.getHeader(assemblyAlignmentsAccompanyingComplexVCF, twoBitRefURL);
final ReferenceMultiSource reference = new ReferenceMultiSource(twoBitRefURL, ReferenceWindowFunctions.IDENTITY_FUNCTION);
final StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection discoverStageArgs =
new StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection();
final String output = IOUtils.createTempFile("cpxReinterpretation", ".vcf").getAbsolutePath();
final SvDiscoveryInputMetaData svDiscoveryInputMetaData =
new SvDiscoveryInputMetaData(ctx, discoverStageArgs, nonCanonicalChromosomeNamesFile,
output, null, null, null, null,
headerForReads, reference, localLogger);

try ( final FeatureDataSource<VariantContext> dataSource =
new FeatureDataSource<>(expectedSimpleVCF, null, 0, VariantContext.class) ) {

final SegmentedCpxVariantSimpleVariantExtractor.ExtractedSimpleVariants extract =
SegmentedCpxVariantSimpleVariantExtractor.extract(complexVariants, svDiscoveryInputMetaData, assemblyAlignments);

Assert.assertTrue(extract.getReInterpretZeroOrOneSegmentCalls().isEmpty());

final SAMSequenceDictionary sequenceDictionary = headerForReads.getSequenceDictionary();

final List<VariantContext> actual = SVVCFWriter
.sortVariantsByCoordinate(extract.getReInterpretMultiSegmentsCalls(), sequenceDictionary);
final List<VariantContext> expected = SVVCFWriter
.sortVariantsByCoordinate(Utils.stream(dataSource.iterator()).collect(Collectors.toList()), sequenceDictionary);
Assert.assertEquals(actual, expected);
}
}
private static final class CpxVariantReInterpreterSparkIntegrationTestArgs {
final String outputDir;

CpxVariantReInterpreterSparkIntegrationTestArgs(final String outputDir) {
this.outputDir = outputDir;
}

String getCommandLine() {
return " -R " + SVIntegrationTestDataProvider.reference_2bit +
" -I " + SVIntegrationTestDataProvider.TEST_CONTIG_SAM +
" -O " + outputDir + "/DiscoverVariantsFromContigAlignmentsSAMSparkIntegrationTest";
}
}

@DataProvider(name = "forCpxVariantReInterpreterSparkIntegrationTest")
private Object[][] createData() {
List<Object[]> data = new ArrayList<>();

return data.toArray(new Object[data.size()][]);
}

@Test(groups = "sv", dataProvider = "forCpxVariantReInterpreterSparkIntegrationTest", enabled = false)
public void testRunLocal(final CpxVariantReInterpreterSparkIntegrationTestArgs params) throws Exception {
final List<String> args = Arrays.asList( new ArgumentsBuilder().add(params.getCommandLine()).getArgsArray() );
runCommandLine(args);

StructuralVariationDiscoveryPipelineSparkIntegrationTest.svDiscoveryVCFEquivalenceTest(expectedSimpleVCF,
SVIntegrationTestDataProvider.EXPECTED_SIMPLE_DEL_VCF, null,
annotationsToIgnoreWhenComparingVariants, false);
}

@Test(groups = "sv", dataProvider = "forCpxVariantReInterpreterSparkIntegrationTest", enabled = false)
public void testRunHDFS(final CpxVariantReInterpreterSparkIntegrationTestArgs params) throws Exception {
MiniClusterUtils.runOnIsolatedMiniCluster(cluster -> {

final List<String> argsToBeModified = Arrays.asList( new ArgumentsBuilder().add(params.getCommandLine()).getArgsArray() );
final Path workingDirectory = MiniClusterUtils.getWorkingDir(cluster);

int idx = 0;

idx = argsToBeModified.indexOf("-I");
Path path = new Path(workingDirectory, "hdfs.sam");
File file = new File(argsToBeModified.get(idx+1));
cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path);
argsToBeModified.set(idx+1, path.toUri().toString());

idx = argsToBeModified.indexOf("-R");
path = new Path(workingDirectory, "reference.2bit");
file = new File(argsToBeModified.get(idx+1));
cluster.getFileSystem().copyFromLocalFile(new Path(file.toURI()), path);
argsToBeModified.set(idx+1, path.toUri().toString());

// outputs, prefix with hdfs address
idx = argsToBeModified.indexOf("-O");
path = new Path(workingDirectory, "test");
final String vcfOnHDFS = path.toUri().toString() + "_sample_inv_del_ins.vcf";
argsToBeModified.set(idx+1, path.toUri().toString());

runCommandLine(argsToBeModified);
StructuralVariationDiscoveryPipelineSparkIntegrationTest.svDiscoveryVCFEquivalenceTest(
vcfOnHDFS,
SVIntegrationTestDataProvider.EXPECTED_SIMPLE_DEL_VCF,
null,
annotationsToIgnoreWhenComparingVariants,
true);
});
}
}

0 comments on commit b93d584

Please sign in to comment.