Skip to content

Commit

Permalink
Add support for block gzipped files with a .bgz suffix (as well as
Browse files Browse the repository at this point in the history
a .gz suffix), see HadoopGenomics/Hadoop-BAM#106.

Also remove spurious debug.
  • Loading branch information
tomwhite committed Jul 7, 2016
1 parent 575984f commit 2167f0b
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.seqdoop.hadoop_bam.VCFInputFormat;
import org.seqdoop.hadoop_bam.VariantContextWritable;
import org.seqdoop.hadoop_bam.util.BGZFCodec;
import org.seqdoop.hadoop_bam.util.BGZFEnhancedGzipCodec;

import java.util.List;

Expand Down Expand Up @@ -61,7 +62,8 @@ public JavaRDD<GATKVariant> getParallelVariants(final List<String> vcfs, final L
*/
public JavaRDD<VariantContext> getParallelVariantContexts(final String vcf, final List<SimpleInterval> intervals) {
Configuration conf = new Configuration();
conf.set("io.compression.codecs", BGZFCodec.class.getCanonicalName());
conf.setStrings("io.compression.codecs", BGZFEnhancedGzipCodec.class.getCanonicalName(),
BGZFCodec.class.getCanonicalName());
if (intervals != null && !intervals.isEmpty()) {
VCFInputFormat.setIntervals(conf, intervals);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
package org.broadinstitute.hellbender.tools.spark.pipelines;

import htsjdk.samtools.util.BlockCompressedInputStream;
import org.apache.commons.io.FileUtils;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.utils.test.ArgumentsBuilder;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.PrintStream;

public final class CountVariantsSparkIntegrationTest extends CommandLineProgramTest {

Expand Down Expand Up @@ -64,9 +63,6 @@ public Object[][] intervals(){

@Test(dataProvider = "intervals", groups = "spark")
public void testCountVariantsWithIntervals(final File fileIn, final String intervalArgs, final long expected) throws Exception {

File f = new File(largeFileTestDir, "dbsnp_138.b37.20.21.vcf.blockgz.gz");
System.out.println("tw: " + BlockCompressedInputStream.isValidFile(new BufferedInputStream(new FileInputStream(f))));
final File outputTxt = createTempFile("count_variants", ".txt");
ArgumentsBuilder args = new ArgumentsBuilder();
args.addVCF(fileIn);
Expand All @@ -75,8 +71,20 @@ public void testCountVariantsWithIntervals(final File fileIn, final String inter
args.addOutput(outputTxt);
this.runCommandLine(args.getArgsArray());

final String readIn = FileUtils.readFileToString(outputTxt.getAbsoluteFile());
Assert.assertEquals((int)Integer.valueOf(readIn), expected);
final ByteArrayOutputStream baosErr = new ByteArrayOutputStream();
final PrintStream err = System.err;
try {
System.setErr(new PrintStream(baosErr));

this.runCommandLine(args.getArgsArray());

final String readIn = FileUtils.readFileToString(outputTxt.getAbsoluteFile());
Assert.assertEquals((int)Integer.valueOf(readIn), expected);
String errString = baosErr.toString();
Assert.assertFalse(errString.contains("Warning: using GzipCodec, which is not splittable,"), errString);
} finally {
System.setErr(err); //put this back in
}
}

@Test(groups = "spark")
Expand Down

0 comments on commit 2167f0b

Please sign in to comment.