diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/bqsr/ApplyBQSR.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/bqsr/ApplyBQSR.java index f172b6d5713..2b74d498531 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/bqsr/ApplyBQSR.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/bqsr/ApplyBQSR.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.tools.walkers.bqsr; +import java.nio.file.Path; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.broadinstitute.barclay.argparser.Argument; @@ -14,6 +15,7 @@ import org.broadinstitute.hellbender.transformers.BQSRReadTransformer; import org.broadinstitute.hellbender.transformers.ReadTransformer; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter; import picard.cmdline.programgroups.ReadDataManipulationProgramGroup; @@ -75,7 +77,7 @@ public final class ApplyBQSR extends ReadWalker{ private static final Logger logger = LogManager.getLogger(ApplyBQSR.class); @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc="Write output to this file") - public File OUTPUT; + public String OUTPUT; /** * This argument is required for recalibration of base qualities. The recalibration table is a file produced by @@ -103,7 +105,7 @@ public ReadTransformer makePostReadFilterTransformer(){ @Override public void onTraversalStart() { - outputWriter = createSAMWriter(OUTPUT, true); + outputWriter = createSAMWriter(IOUtils.getPath(OUTPUT), true); Utils.warnOnNonIlluminaReadGroups(getHeaderForReads(), logger); } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/Utils.java b/src/main/java/org/broadinstitute/hellbender/utils/Utils.java index 0678cd24dad..45382531891 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/Utils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/Utils.java @@ -5,6 +5,9 @@ import com.google.common.primitives.Ints; import htsjdk.samtools.SAMFileHeader; import htsjdk.tribble.util.ParsingUtils; +import java.io.FileNotFoundException; +import java.nio.file.Files; +import java.nio.file.Path; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; @@ -524,12 +527,40 @@ public static String calcMD5(final byte[] bytes) { /** * Calculates the MD5 for the specified file and returns it as a String * + * Warning: this loads the whole file into memory, so it's not suitable + * for large files. + * * @param file file whose MD5 to calculate * @return file's MD5 in String form * @throws IOException if the file could not be read */ public static String calculateFileMD5( final File file ) throws IOException{ - return Utils.calcMD5(FileUtils.readFileToByteArray(file)); + return calculatePathMD5(file.toPath()); + } + + /** + * Calculates the MD5 for the specified file and returns it as a String + * + * Warning: this loads the whole file into memory, so it's not suitable + * for large files. + * + * @param path file whose MD5 to calculate + * @return file's MD5 in String form + * @throws IOException if the file could not be read + */ + public static String calculatePathMD5(final Path path) throws IOException{ + // This doesn't have as nice error messages as FileUtils, but it's close. + String fname = path.toUri().toString(); + if (!Files.exists(path)) { + throw new FileNotFoundException("File '" + fname + "' does not exist"); + } + if (Files.isDirectory(path)) { + throw new IOException("File '" + fname + "' exists but is a directory"); + } + if (!Files.isRegularFile(path)) { + throw new IOException("File '" + fname + "' exists but is not a regular file"); + } + return Utils.calcMD5(Files.readAllBytes(path)); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/ReadUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/read/ReadUtils.java index e7036afaff7..8cbb3aa7d8e 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/ReadUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/ReadUtils.java @@ -14,10 +14,8 @@ import htsjdk.samtools.SAMUtils; import htsjdk.samtools.SamStreams; import htsjdk.samtools.cram.build.CramIO; -import java.io.BufferedInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; +import java.io.*; +import java.nio.file.Files; import java.nio.file.OpenOption; import java.nio.file.Path; import java.util.Arrays; @@ -1131,19 +1129,31 @@ public static SAMFileWriter createCommonSAMWriterFromFactory( * Validate that a file has CRAM contents by checking that it has a valid CRAM file header * (no matter what the extension). * - * @param putativeCRAMFile File to check. + * @param putativeCRAMPath File to check. * @return true if the file has a valid CRAM file header, otherwise false */ - public static boolean hasCRAMFileContents(final File putativeCRAMFile) { - try (final FileInputStream fileStream = new FileInputStream(putativeCRAMFile); - final BufferedInputStream bis = new BufferedInputStream(fileStream)) { - return SamStreams.isCRAMFile(bis); + public static boolean hasCRAMFileContents(final Path putativeCRAMPath) { + try (final InputStream fileStream = Files.newInputStream(putativeCRAMPath)) { + try (final BufferedInputStream bis = new BufferedInputStream(fileStream)) { + return SamStreams.isCRAMFile(bis); + } } catch (IOException e) { throw new UserException.CouldNotReadInputFile(e.getMessage()); } } + /** + * Validate that a file has CRAM contents by checking that it has a valid CRAM file header + * (no matter what the extension). + * + * @param putativeCRAMFile File to check. + * @return true if the file has a valid CRAM file header, otherwise false + */ + public static boolean hasCRAMFileContents(final File putativeCRAMFile) { + return hasCRAMFileContents(putativeCRAMFile.toPath()); + } + public static boolean isNonPrimary(GATKRead read) { return read.isSecondaryAlignment() || read.isSupplementaryAlignment() || read.isUnmapped(); } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/test/SamAssertionUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/test/SamAssertionUtils.java index 94b54820e6d..c642c235d43 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/test/SamAssertionUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/test/SamAssertionUtils.java @@ -14,6 +14,7 @@ import java.io.File; import java.io.IOException; import java.io.PrintWriter; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedHashMap; @@ -36,7 +37,9 @@ public final class SamAssertionUtils { private static SamReader getReader(final File sam, final ValidationStringency validationStringency, final File reference) { return SamReaderFactory.makeDefault().validationStringency(validationStringency).referenceSequence(reference).open(sam); } - + private static SamReader getReader(final Path sam, final ValidationStringency validationStringency, final Path reference) { + return SamReaderFactory.makeDefault().validationStringency(validationStringency).referenceSequence(reference).open(sam); + } /** * causes an exception if the given sam files aren't equal * @param actualSam the actual file @@ -45,8 +48,12 @@ private static SamReader getReader(final File sam, final ValidationStringency va * @param reference is allowed to be null */ public static void assertSamsEqual(final File actualSam, final File expectedSam, final ValidationStringency validationStringency, final File reference) throws IOException { + assertSamsEqual(actualSam.toPath(), expectedSam.toPath(), validationStringency, + (null==reference?null:reference.toPath())); + } + public static void assertSamsEqual(final Path actualSam, final Path expectedSam, final ValidationStringency validationStringency, final Path reference) throws IOException { final String equalStringent = samsEqualStringent(actualSam, expectedSam, validationStringency, reference); - Assert.assertNull(equalStringent, "SAM file " + actualSam.getPath() + " differs from expected output:" + expectedSam.getPath() + " " + equalStringent); + Assert.assertNull(equalStringent, "SAM file " + actualSam.toUri().toString() + " differs from expected output:" + expectedSam.toUri().toString() + " " + equalStringent); } /** @@ -68,6 +75,9 @@ public static void assertSamsEqual(final File actualSam, final File expectedSam, public static void assertSamsEqual(final File actualSam, final File expectedSam, final File reference) throws IOException { assertSamsEqual(actualSam, expectedSam, ValidationStringency.DEFAULT_STRINGENCY, reference); } + public static void assertSamsEqual(final Path actualSam, final Path expectedSam, final Path reference) throws IOException { + assertSamsEqual(actualSam, expectedSam, ValidationStringency.DEFAULT_STRINGENCY, reference); + } /** * causes an exception if the given sam files aren't equal @@ -145,6 +155,19 @@ public static String samsEqualLenient(final File actualSam, final File expectedS * @return null if equal or message string if not equal. */ public static String samsEqualStringent(final File actualSam, final File expectedSam, final ValidationStringency validation, final File reference) throws IOException { + return samsEqualStringent(actualSam.toPath(), expectedSam.toPath(), validation, (null==reference?null:reference.toPath())); + } + + /** + * Compares SAM/BAM files in a stringent way but not by byte identity (allow reorder of attributes) + * Comparing by MD5s is too strict and comparing by SamComparison is too lenient. So we need this method. + * + * This differs from a byte-to-byte comparison: + * - @PG and @CO lines in headers are ignored in the comparison. + * - each read in the actual file are allowed to have a superset of the attributes of the corresponding read in the expected set + * @return null if equal or message string if not equal. + */ + public static String samsEqualStringent(final Path actualSam, final Path expectedSam, final ValidationStringency validation, final Path reference) throws IOException { if (sameMD5s(actualSam, expectedSam)) { return null; } @@ -168,7 +191,22 @@ private static boolean sameMD5s(final File actualSam, final File expectedSam) th return fileMD5_1.equals(fileMD5_2); } + + private static boolean sameMD5s(final Path actualSam, final Path expectedSam) throws IOException { + final String fileMD5_1 = Utils.calculatePathMD5(actualSam); + final String fileMD5_2 = Utils.calculatePathMD5(expectedSam); + return fileMD5_1.equals(fileMD5_2); + } + private static String compareReads(final File actualSam, final File expectedSam, final ValidationStringency validation, final File reference) throws IOException { + return compareReads( + actualSam.toPath(), + expectedSam.toPath(), + validation, + (null==reference?null:reference.toPath())); + } + + private static String compareReads(final Path actualSam, final Path expectedSam, final ValidationStringency validation, final Path reference) throws IOException { try(final SamReader reader1 = getReader(actualSam, validation, reference); final SamReader reader2 = getReader(expectedSam, validation, reference)) { final SAMRecordIterator it1 = reader1.iterator(); @@ -189,9 +227,9 @@ private static String compareReads(final File actualSam, final File expectedSam, } } - private static String equalHeadersIgnoreCOandPG(final File actualSam, final File expectedSam, final ValidationStringency validation, final File reference) throws IOException { + private static String equalHeadersIgnoreCOandPG(final Path actualSam, final Path expectedSam, final ValidationStringency validation, final Path reference) throws IOException { try(final SamReader reader1 = getReader(actualSam, validation, reference); - final SamReader reader2 = getReader(expectedSam, validation, reference)){ + final SamReader reader2 = getReader(expectedSam, validation, reference)){ final SAMFileHeader h1 = reader1.getFileHeader(); final SAMFileHeader h2 = reader2.getFileHeader(); @@ -376,16 +414,24 @@ public static void assertEqualBamFiles( * Validate/assert that the contents are CRAM if the extension is .cram */ public static void assertCRAMContentsIfCRAM(final File putativeCRAMFile) { - if (IOUtils.isCramFile(putativeCRAMFile)) { - assertCRAMContents(putativeCRAMFile); + Path path = (null==putativeCRAMFile?null:putativeCRAMFile.toPath()); + assertCRAMContentsIfCRAM(path); + } + + /** + * Validate/assert that the contents are CRAM if the extension is .cram + */ + public static void assertCRAMContentsIfCRAM(final Path putativeCRAMPath) { + if (IOUtils.isCramFile(putativeCRAMPath)) { + assertCRAMContents(putativeCRAMPath); } } /** * Unconditionally validate/assert that the contents are CRAM */ - public static void assertCRAMContents(final File putativeCRAMFile) { - Assert.assertTrue(ReadUtils.hasCRAMFileContents(putativeCRAMFile), "should have had CRAM contents: " + putativeCRAMFile.getName()); + public static void assertCRAMContents(final Path putativeCRAMPath) { + Assert.assertTrue(ReadUtils.hasCRAMFileContents(putativeCRAMPath), "should have had CRAM contents: " + putativeCRAMPath.toUri().toString()); } private static void sortSam(final File input, final File output, final File reference, final ValidationStringency stringency) { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/bqsr/ApplyBQSRIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/bqsr/ApplyBQSRIntegrationTest.java index 29008648eae..a81ebdd77be 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/bqsr/ApplyBQSRIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/bqsr/ApplyBQSRIntegrationTest.java @@ -1,12 +1,18 @@ package org.broadinstitute.hellbender.tools.walkers.bqsr; +import com.google.common.jimfs.Configuration; +import com.google.common.jimfs.Jimfs; import htsjdk.samtools.SamReaderFactory; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.Path; import org.apache.commons.lang.StringUtils; import org.broadinstitute.barclay.argparser.CommandLineException; import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.utils.gcs.BucketUtils; import org.broadinstitute.hellbender.utils.test.IntegrationTestSpec; import org.broadinstitute.hellbender.utils.test.SamAssertionUtils; import org.testng.Assert; @@ -80,8 +86,18 @@ public Object[][] createABQSRTestData() { return tests.toArray(new Object[][]{}); } + @DataProvider(name = "MiniApplyBQSRTest") + public Object[][] createMiniABQSRTestData() { + List tests = new ArrayList<>(); + + //Note: these outputs were created using GATK3 + tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", null, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.bam")}); + + return tests.toArray(new Object[][]{}); + } + @Test(dataProvider = "ApplyBQSRTest") - public void testApplyBQSR(ABQSRTest params) throws IOException { + public void testApplyBQSRFile(ABQSRTest params) throws IOException { File outFile = GATKBaseTest.createTempFile("applyBQSRTest", params.outputExtension); final ArrayList args = new ArrayList<>(); File refFile = null; @@ -90,10 +106,69 @@ public void testApplyBQSR(ABQSRTest params) throws IOException { args.add(new File(params.bam).getAbsolutePath()); args.add("--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME); args.add(new File(resourceDir + "HiSeq.20mb.1RG.table.gz").getAbsolutePath()); - args.add("-O"); args.add(outFile.getAbsolutePath()); + args.add("-O"); + args.add(outFile.getAbsolutePath()); if (params.reference != null) { refFile = new File(params.reference); - args.add("-R"); args.add(refFile.getAbsolutePath()); + args.add("-R"); + args.add(refFile.getAbsolutePath()); + if (params.args != null) { + Stream.of(params.args).forEach(arg -> args.add(arg)); + } + + runCommandLine(args); + + SamAssertionUtils.assertSamsEqual(outFile, new File(params.expectedFile), refFile); + } + } + + @Test(dataProvider = "MiniApplyBQSRTest") + public void testApplyBQSRPath(ABQSRTest params) throws IOException { + try (FileSystem jimfs = Jimfs.newFileSystem(Configuration.unix())) { + final Path outPath = jimfs.getPath("applyBQSRTest"+params.outputExtension); + + final ArrayList args = new ArrayList<>(); + Path refPath = null; + + args.add("-I"); + args.add(new File(params.bam).getAbsolutePath()); + args.add("--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME); + args.add(new File(resourceDir + "HiSeq.20mb.1RG.table.gz").getAbsolutePath()); + args.add("-O"); args.add(outPath.toUri().toString()); + if (params.reference != null) { + File refFile = new File(params.reference); + args.add("-R"); args.add(refFile.getAbsolutePath()); + refPath = refFile.toPath(); + } + if (params.args != null) { + Stream.of(params.args).forEach(arg -> args.add(arg)); + } + + runCommandLine(args); + + SamAssertionUtils.assertSamsEqual(outPath, new File(params.expectedFile).toPath(), refPath); + } + } + + @Test(dataProvider = "ApplyBQSRTest", groups={"bucket"}) + public void testApplyBQSRCloud(ABQSRTest params) throws IOException { + // getTempFilePath also deletes the file on exit. + final String outString = BucketUtils.getTempFilePath(getGCPTestStaging() + "tmp/testApplyBQSRCloud", params.outputExtension); + final Path outPath = BucketUtils.getPathOnGcs(outString); + final ArrayList args = new ArrayList<>(); + Path refPath = null; + + args.add("-I"); + args.add(new File(params.bam).getAbsolutePath()); + args.add("--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME); + args.add(new File(resourceDir + "HiSeq.20mb.1RG.table.gz").getAbsolutePath()); + args.add("-O"); + args.add(outString); + if (params.reference != null) { + File refFile = new File(params.reference); + args.add("-R"); + args.add(refFile.getAbsolutePath()); + refPath = refFile.toPath(); } if (params.args != null) { Stream.of(params.args).forEach(arg -> args.add(arg)); @@ -101,7 +176,7 @@ public void testApplyBQSR(ABQSRTest params) throws IOException { runCommandLine(args); - SamAssertionUtils.assertSamsEqual(outFile, new File(params.expectedFile), refFile); + SamAssertionUtils.assertSamsEqual(outPath, new File(params.expectedFile).toPath(), refPath); } @Test diff --git a/src/test/java/org/broadinstitute/hellbender/utils/test/SamAssertionUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/test/SamAssertionUtilsUnitTest.java index fa1da3fb881..828b512924a 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/test/SamAssertionUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/test/SamAssertionUtilsUnitTest.java @@ -61,12 +61,12 @@ public Object[][] testCRAMContentsFailData() { @Test(dataProvider = "testCRAMContentsSucceed") public void testAssertCRAMContentsSucceed(File putativeCRAMFile) { - SamAssertionUtils.assertCRAMContents(putativeCRAMFile); + SamAssertionUtils.assertCRAMContents(putativeCRAMFile.toPath()); } @Test(dataProvider = "testCRAMContentsFail", expectedExceptions=AssertionError.class) public void testAssertCRAMContentsFail(File putativeCRAMFile) { - SamAssertionUtils.assertCRAMContents(putativeCRAMFile); + SamAssertionUtils.assertCRAMContents(putativeCRAMFile.toPath()); } @DataProvider(name="testCRAMContentsIfCRAMSucceed")