Skip to content

Commit

Permalink
Add support for block gzipped files with a .bgz suffix (as well as
Browse files Browse the repository at this point in the history
a .gz suffix), see HadoopGenomics/Hadoop-BAM#106.

Also remove spurious debug.
  • Loading branch information
tomwhite committed Jul 1, 2016
1 parent 3f08383 commit a4c8e5b
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.seqdoop.hadoop_bam.VCFInputFormat;
import org.seqdoop.hadoop_bam.VariantContextWritable;
import org.seqdoop.hadoop_bam.util.BGZFCodec;
import org.seqdoop.hadoop_bam.util.BGZFEnhancedGzipCodec;

import java.util.List;

Expand Down Expand Up @@ -61,7 +62,8 @@ public JavaRDD<GATKVariant> getParallelVariants(final List<String> vcfs, final L
*/
public JavaRDD<VariantContext> getParallelVariantContexts(final String vcf, final List<SimpleInterval> intervals) {
Configuration conf = new Configuration();
conf.set("io.compression.codecs", BGZFCodec.class.getCanonicalName());
conf.setStrings("io.compression.codecs", BGZFEnhancedGzipCodec.class.getCanonicalName(),
BGZFCodec.class.getCanonicalName());
if (intervals != null && !intervals.isEmpty()) {
VCFInputFormat.setIntervals(conf, intervals);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
package org.broadinstitute.hellbender.tools.spark.pipelines;

import htsjdk.samtools.util.BlockCompressedInputStream;
import org.apache.commons.io.FileUtils;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.utils.test.ArgumentsBuilder;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;

public final class CountVariantsSparkIntegrationTest extends CommandLineProgramTest {

Expand Down Expand Up @@ -64,9 +61,6 @@ public Object[][] intervals(){

@Test(dataProvider = "intervals", groups = "spark")
public void testCountVariantsWithIntervals(final File fileIn, final String intervalArgs, final long expected) throws Exception {

File f = new File(largeFileTestDir, "dbsnp_138.b37.20.21.vcf.blockgz.gz");
System.out.println("tw: " + BlockCompressedInputStream.isValidFile(new BufferedInputStream(new FileInputStream(f))));
final File outputTxt = createTempFile("count_variants", ".txt");
ArgumentsBuilder args = new ArgumentsBuilder();
args.addVCF(fileIn);
Expand Down

0 comments on commit a4c8e5b

Please sign in to comment.