From 142282a22bc3e785ee7bb577b07c8dbecd0e0b93 Mon Sep 17 00:00:00 2001 From: Takuto Sato Date: Tue, 6 Aug 2024 13:07:46 -0400 Subject: [PATCH] Add piped input tests for RevertSam, MergeBamAlignment (#1974) --- src/main/java/picard/sam/RevertSam.java | 8 +- .../picard/sam/MergeBamAlignmentTest.java | 6 +- src/test/java/picard/sam/PipedDataTest.java | 109 ++++++++++++++---- 3 files changed, 98 insertions(+), 25 deletions(-) diff --git a/src/main/java/picard/sam/RevertSam.java b/src/main/java/picard/sam/RevertSam.java index ab1ffd70b1..8281571a06 100644 --- a/src/main/java/picard/sam/RevertSam.java +++ b/src/main/java/picard/sam/RevertSam.java @@ -269,7 +269,13 @@ protected int doWork() { } final boolean sanitizing = SANITIZE; - final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceSequence.getReferencePath()).validationStringency(VALIDATION_STRINGENCY).open(SamInputResource.of(INPUT.toPath())); // tsato: confirm this won't break piped input + + // Wrap the INPUT.toPath() inside SamInputResource.of() as a workaround for the Illegal Seek error. + // For details, see: + // https://github.com/broadinstitute/picard/pull/1974 + // https://github.com/samtools/htsjdk/pull/1124 + // https://github.com/samtools/htsjdk/issues/1084 + final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceSequence.getReferencePath()).validationStringency(VALIDATION_STRINGENCY).open(SamInputResource.of(INPUT.toPath())); final SAMFileHeader inHeader = in.getFileHeader(); ValidationUtil.validateHeaderOverrides(inHeader, SAMPLE_ALIAS, LIBRARY_NAME); diff --git a/src/test/java/picard/sam/MergeBamAlignmentTest.java b/src/test/java/picard/sam/MergeBamAlignmentTest.java index 54be4cd116..affbe24c00 100644 --- a/src/test/java/picard/sam/MergeBamAlignmentTest.java +++ b/src/test/java/picard/sam/MergeBamAlignmentTest.java @@ -75,7 +75,7 @@ public class MergeBamAlignmentTest extends CommandLineProgramTest { private static final File DATA_DIR = new File("testdata/picard/sam"); private static final File TEST_DATA_DIR = new File(DATA_DIR, "MergeBamAlignment"); - private static final File unmappedBam = new File(DATA_DIR, "unmapped.sam"); + public static final File unmappedBam = new File(DATA_DIR, "unmapped.sam"); private static final File alignedBam = new File(DATA_DIR, "aligned.sam"); private static final File oneHalfAlignedBam = new File(DATA_DIR, "onehalfaligned.sam"); private static final File otherHalfAlignedBam = new File(DATA_DIR, "otherhalfaligned.sam"); @@ -87,8 +87,8 @@ public class MergeBamAlignmentTest extends CommandLineProgramTest { private static final File secondReadAlignedBam_firstHalf = new File(TEST_DATA_DIR, "firsthalf.read2.trimmed.aligned.sam"); private static final File secondReadAlignedBam_secondHalf = new File(TEST_DATA_DIR, "secondhalf.read2.trimmed.aligned.sam"); private static final File supplementalReadAlignedBam = new File(TEST_DATA_DIR, "aligned.supplement.sam"); - private static final File alignedQuerynameSortedBam = new File(DATA_DIR, "aligned_queryname_sorted.sam"); - private static final File fasta = new File(DATA_DIR, "merger.fasta"); + public static final File alignedQuerynameSortedBam = new File(DATA_DIR, "aligned_queryname_sorted.sam"); + public static final File fasta = new File(DATA_DIR, "merger.fasta"); private static final String bigSequenceName = "chr7"; // The longest sequence in merger.fasta private static final File sequenceDict = new File(DATA_DIR, "merger.dict"); private static final File sequenceDict2 = new File(DATA_DIR, "merger.2.dict"); diff --git a/src/test/java/picard/sam/PipedDataTest.java b/src/test/java/picard/sam/PipedDataTest.java index c3c3cee903..370ac77969 100644 --- a/src/test/java/picard/sam/PipedDataTest.java +++ b/src/test/java/picard/sam/PipedDataTest.java @@ -1,35 +1,98 @@ package picard.sam; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import picard.cmdline.CommandLineProgramTest; + +import java.io.File; +import java.io.IOException; +import java.util.stream.StreamSupport; + +import static picard.sam.MergeBamAlignmentTest.fasta; +import static picard.sam.MergeBamAlignmentTest.unmappedBam; public class PipedDataTest { + private static final String classPath = "\"" + System.getProperty("java.class.path") + "\" "; + private static final String picardCommandlinePreamble = "java -classpath " + classPath + "picard.cmdline.PicardCommandLine "; + private static final String testBam = "testdata/picard/sam/test.bam"; - private String classPath = "\"" + System.getProperty("java.class.path") + "\" "; + /** + * Creates the command line argument to be used for piped (/dev/stdin) input tests. + * + * @param inputSAM the path to the input SAM file. + * @return commandline for ViewSam. + */ + private String getViewSamPicardCommand(final String inputSAM){ + return picardCommandlinePreamble + + "ViewSam " + + "I=" + inputSAM + " " + + "ALIGNMENT_STATUS=All " + + "PF_STATUS=All "; + } - @Test - public void testSortSam() { - String[] command = { + @DataProvider(name = "pipedInputTestData") + public Object[][] getPipedInputTestData() throws IOException { + // Note the trailing space in each string block. + // TODO: port ArgumentBuilder from GATK so this would not be necessary. + final File sortSamOutput = File.createTempFile("SortSam_pipe_test", "bam"); + sortSamOutput.deleteOnExit(); + final String[] sortSamCommand = { "/bin/bash", "-c", - "java -classpath " + - classPath + - "picard.cmdline.PicardCommandLine " + - "ViewSam " + - "I=testdata/picard/sam/test.bam " + - "ALIGNMENT_STATUS=All " + - "PF_STATUS=All " + - "| " + - "java -classpath " + - classPath + - "picard.cmdline.PicardCommandLine " + - "SortSam " + - "I=/dev/stdin " + - "O=/dev/null " + - "SORT_ORDER=queryname" + getViewSamPicardCommand(testBam) + + "| " + + picardCommandlinePreamble + + "SortSam " + + "SORT_ORDER=queryname " + + "I=/dev/stdin " + + "O=" + sortSamOutput.getAbsolutePath() }; + final File revertSamOutput = File.createTempFile("RevertSam_pipe_test", "bam"); + revertSamOutput.deleteOnExit(); + final String[] revertSamCommand = { + "/bin/bash", + "-c", + getViewSamPicardCommand(testBam) + + "| " + + picardCommandlinePreamble + + "RevertSam " + + "I=/dev/stdin " + + "O=" + revertSamOutput.getAbsolutePath() + }; + + + // Only test the case where the "alignedBam" argument is /dev/stdin, which is the standard use case (bwa -> MergeBamAlignment). + // + // Note that the input aligned bam must be query-name sorted (as it would be if it's piped from bwa). + // MergeBamAlignment fails if the coordinate sorted aligned bam is given, after trying to query-name sort dynamically and failing to find the reference header items, + // seemingly due to the fact that the input is /dev/stdin. + final File mergeBamAlignmentOutput = File.createTempFile("MergeBamAlignment_pipe_test", "bam"); + mergeBamAlignmentOutput.deleteOnExit(); + final String[] mergeBamAlignmentCommand = { + "/bin/bash", + "-c", + getViewSamPicardCommand(MergeBamAlignmentTest.alignedQuerynameSortedBam.getAbsolutePath()) + // here we have to use "+", not ",". + "| " + + picardCommandlinePreamble + + "MergeBamAlignment " + + "ALIGNED_BAM=/dev/stdin " + + "UNMAPPED=" + unmappedBam + " " + + "REFERENCE_SEQUENCE=" + fasta + " " + + "OUTPUT=" + mergeBamAlignmentOutput.getAbsolutePath() + }; + + return new Object[][]{ + {sortSamCommand, sortSamOutput}, + {revertSamCommand, revertSamOutput}, + {mergeBamAlignmentCommand, mergeBamAlignmentOutput} + }; + } + + @Test(dataProvider = "pipedInputTestData") + public void testPipedInput(final String[] command, final File tmpOutput) { try { ProcessBuilder processBuilder = new ProcessBuilder(command); processBuilder.inheritIO(); @@ -37,8 +100,12 @@ public void testSortSam() { Process process = processBuilder.start(); Assert.assertEquals(process.waitFor(), 0); + final SamReader reader = SamReaderFactory.makeDefault().open(tmpOutput); + final long numOutputReads = StreamSupport.stream(reader.spliterator(), false).count(); + Assert.assertTrue(numOutputReads > 0); + Assert.assertTrue(reader.getFileHeader().getReadGroups().size() > 0); } catch (Exception e) { - Assert.fail("Failed to pipe data from htsjdk to picard", e); + Assert.fail("Failed to pipe data to picard. The error was: ", e); } } }