-
Notifications
You must be signed in to change notification settings - Fork 597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
NIO output support for ApplyBQSR #4424
Changes from 2 commits
2536af7
b7c0c00
9d0403b
ed3632a
077e85f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,9 @@ | |
import com.google.common.primitives.Ints; | ||
import htsjdk.samtools.SAMFileHeader; | ||
import htsjdk.tribble.util.ParsingUtils; | ||
import java.io.FileNotFoundException; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import org.apache.commons.io.FileUtils; | ||
import org.apache.commons.lang3.ArrayUtils; | ||
import org.apache.commons.lang3.StringUtils; | ||
|
@@ -531,6 +534,17 @@ public static String calcMD5(final byte[] bytes) { | |
public static String calculateFileMD5( final File file ) throws IOException{ | ||
return Utils.calcMD5(FileUtils.readFileToByteArray(file)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems like if we can we should make all the old methods delegate to the new ones There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, done. |
||
} | ||
public static String calculatePathMD5(final Path path) throws IOException{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add javadoc for the new overload of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, can you make sure that these new method overloads are covered by unit tests? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added Javadoc and made sure that the first calls the second. Since the first is tested, that also covers the second automatically. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a warning to the javadoc for this method that it slurps the entire file into memory, and should not be used for large files. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done! |
||
// This doesn't have as nice error messages as FileUtils, but it's close. | ||
String fname = path.toUri().toString(); | ||
if (!Files.exists(path)) { | ||
throw new FileNotFoundException("File '" + fname + "' does not exist"); | ||
} | ||
if (Files.isDirectory(path)) { | ||
throw new IOException("File '" + fname + "' exists but is a directory"); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this check Files.isRegularFile() as well? It seems wrong to try to take the md5 of a pipe or something like that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, done. |
||
return Utils.calcMD5(Files.readAllBytes(path)); | ||
} | ||
|
||
/** | ||
* Checks that an Object {@code object} is not null and returns the same object or throws an {@link IllegalArgumentException} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,10 +14,8 @@ | |
import htsjdk.samtools.SAMUtils; | ||
import htsjdk.samtools.SamStreams; | ||
import htsjdk.samtools.cram.build.CramIO; | ||
import java.io.BufferedInputStream; | ||
import java.io.File; | ||
import java.io.FileInputStream; | ||
import java.io.IOException; | ||
import java.io.*; | ||
import java.nio.file.Files; | ||
import java.nio.file.OpenOption; | ||
import java.nio.file.Path; | ||
import java.util.Arrays; | ||
|
@@ -1131,19 +1129,31 @@ public static SAMFileWriter createCommonSAMWriterFromFactory( | |
* Validate that a file has CRAM contents by checking that it has a valid CRAM file header | ||
* (no matter what the extension). | ||
* | ||
* @param putativeCRAMFile File to check. | ||
* @param putativeCRAMPath File to check. | ||
* @return true if the file has a valid CRAM file header, otherwise false | ||
*/ | ||
public static boolean hasCRAMFileContents(final File putativeCRAMFile) { | ||
try (final FileInputStream fileStream = new FileInputStream(putativeCRAMFile); | ||
final BufferedInputStream bis = new BufferedInputStream(fileStream)) { | ||
return SamStreams.isCRAMFile(bis); | ||
public static boolean hasCRAMFileContents(final Path putativeCRAMPath) { | ||
try (final InputStream fileStream = Files.newInputStream(putativeCRAMPath)) { | ||
try (final BufferedInputStream bis = new BufferedInputStream(fileStream)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's better to have 2 resources declared in the same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't compile that way, complains about an IOException. Suggestions welcome, though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We tried it, and it does actually work as a single |
||
return SamStreams.isCRAMFile(bis); | ||
} | ||
} | ||
catch (IOException e) { | ||
throw new UserException.CouldNotReadInputFile(e.getMessage()); | ||
} | ||
} | ||
|
||
/** | ||
* Validate that a file has CRAM contents by checking that it has a valid CRAM file header | ||
* (no matter what the extension). | ||
* | ||
* @param putativeCRAMFile File to check. | ||
* @return true if the file has a valid CRAM file header, otherwise false | ||
*/ | ||
public static boolean hasCRAMFileContents(final File putativeCRAMFile) { | ||
return hasCRAMFileContents(putativeCRAMFile.toPath()); | ||
} | ||
|
||
public static boolean isNonPrimary(GATKRead read) { | ||
return read.isSecondaryAlignment() || read.isSupplementaryAlignment() || read.isUnmapped(); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
import java.io.File; | ||
import java.io.IOException; | ||
import java.io.PrintWriter; | ||
import java.nio.file.Path; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.LinkedHashMap; | ||
|
@@ -36,7 +37,9 @@ public final class SamAssertionUtils { | |
private static SamReader getReader(final File sam, final ValidationStringency validationStringency, final File reference) { | ||
return SamReaderFactory.makeDefault().validationStringency(validationStringency).referenceSequence(reference).open(sam); | ||
} | ||
|
||
private static SamReader getReader(final Path sam, final ValidationStringency validationStringency, final Path reference) { | ||
return SamReaderFactory.makeDefault().validationStringency(validationStringency).referenceSequence(reference).open(sam); | ||
} | ||
/** | ||
* causes an exception if the given sam files aren't equal | ||
* @param actualSam the actual file | ||
|
@@ -45,8 +48,12 @@ private static SamReader getReader(final File sam, final ValidationStringency va | |
* @param reference is allowed to be null | ||
*/ | ||
public static void assertSamsEqual(final File actualSam, final File expectedSam, final ValidationStringency validationStringency, final File reference) throws IOException { | ||
assertSamsEqual(actualSam.toPath(), expectedSam.toPath(), validationStringency, | ||
(null==reference?null:reference.toPath())); | ||
} | ||
public static void assertSamsEqual(final Path actualSam, final Path expectedSam, final ValidationStringency validationStringency, final Path reference) throws IOException { | ||
final String equalStringent = samsEqualStringent(actualSam, expectedSam, validationStringency, reference); | ||
Assert.assertNull(equalStringent, "SAM file " + actualSam.getPath() + " differs from expected output:" + expectedSam.getPath() + " " + equalStringent); | ||
Assert.assertNull(equalStringent, "SAM file " + actualSam.toUri().toString() + " differs from expected output:" + expectedSam.toUri().toString() + " " + equalStringent); | ||
} | ||
|
||
/** | ||
|
@@ -68,6 +75,9 @@ public static void assertSamsEqual(final File actualSam, final File expectedSam, | |
public static void assertSamsEqual(final File actualSam, final File expectedSam, final File reference) throws IOException { | ||
assertSamsEqual(actualSam, expectedSam, ValidationStringency.DEFAULT_STRINGENCY, reference); | ||
} | ||
public static void assertSamsEqual(final Path actualSam, final Path expectedSam, final Path reference) throws IOException { | ||
assertSamsEqual(actualSam, expectedSam, ValidationStringency.DEFAULT_STRINGENCY, reference); | ||
} | ||
|
||
/** | ||
* causes an exception if the given sam files aren't equal | ||
|
@@ -161,14 +171,46 @@ public static String samsEqualStringent(final File actualSam, final File expecte | |
return compareReads(actualSam, expectedSam, validation, reference); | ||
} | ||
|
||
public static String samsEqualStringent(final Path actualSam, final Path expectedSam, final ValidationStringency validation, final Path reference) throws IOException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you make the file overload of this method call into the Path version? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also add javadoc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Certainly! Done. |
||
if (sameMD5s(actualSam, expectedSam)) { | ||
return null; | ||
} | ||
|
||
// verify that CRAM files have CRAM contents | ||
assertCRAMContentsIfCRAM(actualSam); | ||
assertCRAMContentsIfCRAM(expectedSam); | ||
|
||
String msg = equalHeadersIgnoreCOandPG(actualSam, expectedSam, validation, reference); | ||
if (msg != null) { return msg; } | ||
|
||
//At this point we know that the files are not byte-wise identical, but are equal according to SamComparison and their headers are equal | ||
//So we iterate over reads and compare them one by one. | ||
return compareReads(actualSam, expectedSam, validation, reference); | ||
} | ||
|
||
|
||
private static boolean sameMD5s(final File actualSam, final File expectedSam) throws IOException { | ||
final String fileMD5_1 = Utils.calculateFileMD5(actualSam); | ||
final String fileMD5_2 = Utils.calculateFileMD5(expectedSam); | ||
return fileMD5_1.equals(fileMD5_2); | ||
} | ||
|
||
|
||
private static boolean sameMD5s(final Path actualSam, final Path expectedSam) throws IOException { | ||
final String fileMD5_1 = Utils.calculatePathMD5(actualSam); | ||
final String fileMD5_2 = Utils.calculatePathMD5(expectedSam); | ||
return fileMD5_1.equals(fileMD5_2); | ||
} | ||
|
||
private static String compareReads(final File actualSam, final File expectedSam, final ValidationStringency validation, final File reference) throws IOException { | ||
return compareReads( | ||
actualSam.toPath(), | ||
expectedSam.toPath(), | ||
validation, | ||
(null==reference?null:reference.toPath())); | ||
} | ||
|
||
private static String compareReads(final Path actualSam, final Path expectedSam, final ValidationStringency validation, final Path reference) throws IOException { | ||
try(final SamReader reader1 = getReader(actualSam, validation, reference); | ||
final SamReader reader2 = getReader(expectedSam, validation, reference)) { | ||
final SAMRecordIterator it1 = reader1.iterator(); | ||
|
@@ -215,6 +257,32 @@ private static String equalHeadersIgnoreCOandPG(final File actualSam, final File | |
return msg; | ||
} | ||
} | ||
private static String equalHeadersIgnoreCOandPG(final Path actualSam, final Path expectedSam, final ValidationStringency validation, final Path reference) throws IOException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you make the file overload call into this one? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh actually I meant to delete it; it's private and has no callers. |
||
try(final SamReader reader1 = getReader(actualSam, validation, reference); | ||
final SamReader reader2 = getReader(expectedSam, validation, reference)){ | ||
|
||
final SAMFileHeader h1 = reader1.getFileHeader(); | ||
final SAMFileHeader h2 = reader2.getFileHeader(); | ||
String msg; | ||
|
||
//Note: we allow the versions to differ | ||
|
||
msg = compareValues(h1.getCreator(), h2.getCreator(), "File creator"); | ||
if (msg != null) { return msg; } | ||
|
||
msg = compareValues(h1.getAttribute("SO"), h2.getAttribute("SO"), "Sort order"); | ||
if (msg != null) { return msg; } | ||
|
||
if (! Objects.equals(h1.getSequenceDictionary(), h2.getSequenceDictionary())){ | ||
return "Different Sequence dictionaries"; | ||
} | ||
|
||
msg = compareReadGroups(h1, h2); | ||
if (msg != null) { return msg; } | ||
|
||
return msg; | ||
} | ||
} | ||
|
||
private static String compareReadGroups(final SAMFileHeader h1, final SAMFileHeader h2) { | ||
final List<SAMReadGroupRecord> l1 = h1.getReadGroups(); | ||
|
@@ -377,15 +445,20 @@ public static void assertEqualBamFiles( | |
*/ | ||
public static void assertCRAMContentsIfCRAM(final File putativeCRAMFile) { | ||
if (IOUtils.isCramFile(putativeCRAMFile)) { | ||
assertCRAMContents(putativeCRAMFile); | ||
assertCRAMContents(putativeCRAMFile.toPath()); | ||
} | ||
} | ||
public static void assertCRAMContentsIfCRAM(final Path putativeCRAMPath) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. javadoc for this method There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done; I also changed the File version to more cleanly call the Path version. |
||
if (IOUtils.isCramFile(putativeCRAMPath)) { | ||
assertCRAMContents(putativeCRAMPath); | ||
} | ||
} | ||
|
||
/** | ||
* Unconditionally validate/assert that the contents are CRAM | ||
*/ | ||
public static void assertCRAMContents(final File putativeCRAMFile) { | ||
Assert.assertTrue(ReadUtils.hasCRAMFileContents(putativeCRAMFile), "should have had CRAM contents: " + putativeCRAMFile.getName()); | ||
public static void assertCRAMContents(final Path putativeCRAMPath) { | ||
Assert.assertTrue(ReadUtils.hasCRAMFileContents(putativeCRAMPath), "should have had CRAM contents: " + putativeCRAMPath.toUri().toString()); | ||
} | ||
|
||
private static void sortSam(final File input, final File output, final File reference, final ValidationStringency stringency) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,10 @@ | ||
package org.broadinstitute.hellbender.tools.walkers.bqsr; | ||
|
||
import com.google.common.jimfs.Configuration; | ||
import com.google.common.jimfs.Jimfs; | ||
import htsjdk.samtools.SamReaderFactory; | ||
import java.nio.file.FileSystem; | ||
import java.nio.file.Path; | ||
import org.apache.commons.lang.StringUtils; | ||
import org.broadinstitute.barclay.argparser.CommandLineException; | ||
import org.broadinstitute.hellbender.CommandLineProgramTest; | ||
|
@@ -82,26 +86,31 @@ public Object[][] createABQSRTestData() { | |
|
||
@Test(dataProvider = "ApplyBQSRTest") | ||
public void testApplyBQSR(ABQSRTest params) throws IOException { | ||
File outFile = GATKBaseTest.createTempFile("applyBQSRTest", params.outputExtension); | ||
final ArrayList<String> args = new ArrayList<>(); | ||
File refFile = null; | ||
|
||
args.add("-I"); | ||
args.add(new File(params.bam).getAbsolutePath()); | ||
args.add("--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME); | ||
args.add(new File(resourceDir + "HiSeq.20mb.1RG.table.gz").getAbsolutePath()); | ||
args.add("-O"); args.add(outFile.getAbsolutePath()); | ||
if (params.reference != null) { | ||
refFile = new File(params.reference); | ||
args.add("-R"); args.add(refFile.getAbsolutePath()); | ||
try (FileSystem jimfs = Jimfs.newFileSystem(Configuration.unix())) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you keep the existing test code as-is, and make the Jimfs-based tests a separate test method? It would also be good to have one test case that writes to a live GCS bucket (marked as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Re-instated the old (File-based) test. Remains to add a GCS test. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GCS test added. For now it runs all the cases, but since the other tests already establish that ApplyBQSR does the right thing and we just want to make sure it can write to GCS, it'd make sense to only run a subset of the cases on the cloud. If you let me know which ones you want, I can cull the rest. |
||
final Path outPath = jimfs.getPath("applyBQSRTest"+params.outputExtension); | ||
System.out.println("apply path: " + outPath.toUri().toString()); | ||
|
||
final ArrayList<String> args = new ArrayList<>(); | ||
Path refPath = null; | ||
|
||
args.add("-I"); | ||
args.add(new File(params.bam).getAbsolutePath()); | ||
args.add("--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME); | ||
args.add(new File(resourceDir + "HiSeq.20mb.1RG.table.gz").getAbsolutePath()); | ||
args.add("-O"); args.add(outPath.toUri().toString()); | ||
if (params.reference != null) { | ||
File refFile = new File(params.reference); | ||
args.add("-R"); args.add(refFile.getAbsolutePath()); | ||
refPath = refFile.toPath(); | ||
} | ||
if (params.args != null) { | ||
Stream.of(params.args).forEach(arg -> args.add(arg)); | ||
} | ||
|
||
runCommandLine(args); | ||
|
||
SamAssertionUtils.assertSamsEqual(outPath, new File(params.expectedFile).toPath(), refPath); | ||
} | ||
if (params.args != null) { | ||
Stream.of(params.args).forEach(arg -> args.add(arg)); | ||
} | ||
|
||
runCommandLine(args); | ||
|
||
SamAssertionUtils.assertSamsEqual(outFile, new File(params.expectedFile), refFile); | ||
} | ||
|
||
@Test | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,12 +61,12 @@ public Object[][] testCRAMContentsFailData() { | |
|
||
@Test(dataProvider = "testCRAMContentsSucceed") | ||
public void testAssertCRAMContentsSucceed(File putativeCRAMFile) { | ||
SamAssertionUtils.assertCRAMContents(putativeCRAMFile); | ||
SamAssertionUtils.assertCRAMContents(putativeCRAMFile.toPath()); | ||
} | ||
|
||
@Test(dataProvider = "testCRAMContentsFail", expectedExceptions=AssertionError.class) | ||
public void testAssertCRAMContentsFail(File putativeCRAMFile) { | ||
SamAssertionUtils.assertCRAMContents(putativeCRAMFile); | ||
SamAssertionUtils.assertCRAMContents(putativeCRAMFile.toPath()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Test the file-based overloads as well There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's no file-based overload of |
||
} | ||
|
||
@DataProvider(name="testCRAMContentsIfCRAMSucceed") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a warning to the javadoc for this method that it slurps the entire file into memory, and should not be used for large files.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!