diff --git a/src/main/java/picard/analysis/CollectMultipleMetrics.java b/src/main/java/picard/analysis/CollectMultipleMetrics.java index 81a47dd7b0..4daa07d490 100644 --- a/src/main/java/picard/analysis/CollectMultipleMetrics.java +++ b/src/main/java/picard/analysis/CollectMultipleMetrics.java @@ -63,7 +63,8 @@ public class CollectMultipleMetrics extends CommandLineProgram { static final String USAGE_DETAILS ="This 'meta-metrics' tool runs one or more of the metrics collection modules at the same" + " time to cut down on the time spent reading in data from input files. Available modules include " + "CollectAlignmentSummaryMetrics, CollectInsertSizeMetrics, QualityScoreDistribution, MeanQualityByCycle, " + - "CollectBaseDistributionByCycle, CollectGcBiasMetrics, RnaSeqMetrics, CollectSequencingArtifactMetrics, and CollectQualityYieldMetrics. " + + "CollectBaseDistributionByCycle, CollectGcBiasMetrics, RnaSeqMetrics, CollectSequencingArtifactMetrics, " + + "CollectQualityYieldMetrics and CollectRrbsMetrics. " + "The tool produces outputs of '.pdf' and '.txt' files for each module, except for the " + "CollectAlignmentSummaryMetrics module, which outputs only a '.txt' file. Output files are named by specifying a base name " + "(without any file extensions).

" + @@ -103,6 +104,29 @@ SinglePassSamProgram makeInstance(final String outbase, final String outext, fin } public static enum Program implements ProgramInterface { + CollectRrbsMetrics { + @Override + public SinglePassSamProgram makeInstance(final String outbase, final String outext, final File input, final File reference, final Set metricAccumulationLevel, final File dbSnp, final File intervals) { + final CollectRrbsMetrics program = new CollectRrbsMetrics(); + program.OUTPUT = new File(outbase + ".rrbs.detail_metrics" + outext); + program.SUMMARY_OUTPUT = new File(outbase + ".rrbs.summary_metrics" + outext); + program.CHART_OUTPUT = new File(outbase + ".rrbs.pdf"); + program.INPUT = input; + program.METRIC_ACCUMULATION_LEVEL = metricAccumulationLevel; + program.ASSUME_SORTED = false; + program.setReferenceSequence(reference); + + return program; + } + @Override + public boolean needsReferenceSequence() { + return true; + } + @Override + public boolean supportsMetricAccumulationLevel() { + return true; + } + }, CollectAlignmentSummaryMetrics { @Override public boolean needsReferenceSequence() { diff --git a/src/main/java/picard/analysis/CollectRrbsMetrics.java b/src/main/java/picard/analysis/CollectRrbsMetrics.java index 61b27e76e1..83cd4cb56c 100644 --- a/src/main/java/picard/analysis/CollectRrbsMetrics.java +++ b/src/main/java/picard/analysis/CollectRrbsMetrics.java @@ -27,17 +27,11 @@ import htsjdk.samtools.*; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.reference.ReferenceSequence; -import htsjdk.samtools.reference.ReferenceSequenceFileWalker; -import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.CollectionUtil; import htsjdk.samtools.util.IOUtil; -import htsjdk.samtools.util.Log; -import htsjdk.samtools.util.ProgressLogger; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; -import picard.PicardException; -import picard.cmdline.CommandLineProgram; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.argumentcollections.ReferenceArgumentCollection; import picard.cmdline.programgroups.Metrics; @@ -62,7 +56,7 @@ programGroup = Metrics.class ) @DocumentedFeature -public class CollectRrbsMetrics extends CommandLineProgram { +public class CollectRrbsMetrics extends SinglePassSamProgram { static final String USAGE_SUMMARY = "Collects metrics from reduced representation bisulfite sequencing (Rrbs) data. "; static final String USAGE_DETAILS = "

This tool uses reduced representation bisulfite sequencing (Rrbs) data to determine cytosine " + "methylation status across all reads of a genomic DNA sequence. For a primer on bisulfite sequencing and cytosine methylation, " + @@ -82,7 +76,18 @@ public class CollectRrbsMetrics extends CommandLineProgram { "The detailed metrics table includes the coordinates of all of the CpG sites for the experiment as well as the conversion rates " + "observed for each site.

" + - "

Usage example:

" + + "

It is possible to launch CollectRrbsMetrics tool by setting all output files or by indicating a base name for metrics files only. See examples:

" + + + "

Usage example 1:

" + + "
" +
+            "java -jar picard.jar CollectRrbsMetrics \\
" + + " R=reference_sequence.fasta \\
" + + " I=input.bam \\
" + + " O=detail_output.txt \\
" + + " CHART=chart_output.pdf \\
" + + " S=summary_output.txt" + + "
" + + "

Usage example 2:

" + "
" +
             "java -jar picard.jar CollectRrbsMetrics \\
" + " R=reference_sequence.fasta \\
" + @@ -95,13 +100,11 @@ public class CollectRrbsMetrics extends CommandLineProgram { " for a complete description of both the detail and summary metrics produced by this tool.

" + "
"; -// Path to R file for plotting purposes + // Path to R file for plotting purposes -private static final String R_SCRIPT = "picard/analysis/rrbsQc.R"; + private static final String R_SCRIPT = "picard/analysis/rrbsQc.R"; - @Argument(doc = "The BAM or SAM file containing aligned reads. Must be coordinate sorted", shortName = StandardOptionDefinitions.INPUT_SHORT_NAME) - public File INPUT; - @Argument(doc = "Base name for output files", shortName = StandardOptionDefinitions.METRICS_FILE_SHORT_NAME) + @Argument(doc = "Base name for output files", shortName = StandardOptionDefinitions.METRICS_FILE_SHORT_NAME, mutex = {"OUTPUT", "CHART_OUTPUT", "SUMMARY_OUTPUT"}) public String METRICS_FILE_PREFIX; @Argument(doc = "Minimum read length") public int MINIMUM_READ_LENGTH = 5; @@ -112,18 +115,19 @@ public class CollectRrbsMetrics extends CommandLineProgram { @Argument(doc = "Maximum percentage of mismatches in a read for it to be considered, with a range of 0-1") public double MAX_MISMATCH_RATE = 0.1; @Argument(doc = "Set of sequence names to consider, if not specified all sequences will be used", optional = true) - public Set SEQUENCE_NAMES = new HashSet(); - @Argument(shortName = StandardOptionDefinitions.ASSUME_SORTED_SHORT_NAME, - doc = "If true, assume that the input file is coordinate sorted even if the header says otherwise.") - public boolean ASSUME_SORTED = false; + public Set SEQUENCE_NAMES = new HashSet<>(); @Argument(shortName = "LEVEL", doc = "The level(s) at which to accumulate metrics. ") public Set METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS); + @Argument(shortName = "CHART", doc = "The PDF file to render the chart to.", mutex = {"METRICS_FILE_PREFIX"}) + public File CHART_OUTPUT; + @Argument(shortName = "S", doc = "The text file to write summary metrics to.", mutex = {"METRICS_FILE_PREFIX"}) + public File SUMMARY_OUTPUT; public static final String DETAIL_FILE_EXTENSION = "rrbs_detail_metrics"; public static final String SUMMARY_FILE_EXTENSION = "rrbs_summary_metrics"; public static final String PDF_FILE_EXTENSION = "rrbs_qc.pdf"; - private static final Log log = Log.getInstance(CollectRrbsMetrics.class); + private RrbsMetricsCollector metricsCollector; // return a custom argument collection since this tool uses a (required) argument name // of "REFERENCE", not "REFERENCE_SEQUENCE" @@ -147,33 +151,43 @@ public static void main(final String[] args) { } @Override - protected int doWork() { - if (!METRICS_FILE_PREFIX.endsWith(".")) { - METRICS_FILE_PREFIX = METRICS_FILE_PREFIX + "."; - } - final File SUMMARY_OUT = new File(METRICS_FILE_PREFIX + SUMMARY_FILE_EXTENSION); - final File DETAILS_OUT = new File(METRICS_FILE_PREFIX + DETAIL_FILE_EXTENSION); - final File PLOTS_OUT = new File(METRICS_FILE_PREFIX + PDF_FILE_EXTENSION); - assertIoFiles(SUMMARY_OUT, DETAILS_OUT, PLOTS_OUT); - - final SamReader samReader = SamReaderFactory.makeDefault().open(INPUT); - if (!ASSUME_SORTED && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { - throw new PicardException("The input file " + INPUT.getAbsolutePath() + " does not appear to be coordinate sorted"); + protected void setup(final SAMFileHeader header, final File samFile) { + if (METRICS_FILE_PREFIX != null) { + if (!METRICS_FILE_PREFIX.endsWith(".")) { + METRICS_FILE_PREFIX = METRICS_FILE_PREFIX + "."; + } + OUTPUT = new File(METRICS_FILE_PREFIX + DETAIL_FILE_EXTENSION); + SUMMARY_OUTPUT = new File(METRICS_FILE_PREFIX + SUMMARY_FILE_EXTENSION); + CHART_OUTPUT = new File(METRICS_FILE_PREFIX + PDF_FILE_EXTENSION); } + IOUtil.assertFileIsWritable(OUTPUT); + IOUtil.assertFileIsWritable(SUMMARY_OUTPUT); + IOUtil.assertFileIsWritable(CHART_OUTPUT); + metricsCollector = new RrbsMetricsCollector( + METRIC_ACCUMULATION_LEVEL, + header.getReadGroups(), + C_QUALITY_THRESHOLD, + NEXT_BASE_QUALITY_THRESHOLD, + MINIMUM_READ_LENGTH, + MAX_MISMATCH_RATE + ); + } - final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE); - final ProgressLogger progressLogger = new ProgressLogger(log); + @Override + protected void acceptRead(final SAMRecord samRecord, final ReferenceSequence ref) { + if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) { + metricsCollector.acceptRecord(samRecord, ref); + } + } - final RrbsMetricsCollector metricsCollector = new RrbsMetricsCollector(METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(), - C_QUALITY_THRESHOLD, NEXT_BASE_QUALITY_THRESHOLD, MINIMUM_READ_LENGTH, MAX_MISMATCH_RATE); + private boolean isSequenceFiltered(final String sequenceName) { + return SEQUENCE_NAMES != null + && !SEQUENCE_NAMES.isEmpty() + && !SEQUENCE_NAMES.contains(sequenceName); + } - for (final SAMRecord samRecord : samReader) { - progressLogger.record(samRecord); - if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) { - final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex()); - metricsCollector.acceptRecord(samRecord, referenceSequence); - } - } + @Override + protected void finish() { metricsCollector.finish(); final MetricsFile> rrbsMetrics = getMetricsFile(); metricsCollector.addAllLevelsToFile(rrbsMetrics); @@ -182,34 +196,23 @@ protected int doWork() { // we get it out split it apart to the two separate MetricsFiles and write them to file final MetricsFile summaryFile = getMetricsFile(); final MetricsFile detailsFile = getMetricsFile(); - for (final RrbsMetrics rrbsMetric : rrbsMetrics.getMetrics()) { + rrbsMetrics.getMetrics().forEach(rrbsMetric -> { summaryFile.addMetric(rrbsMetric.getSummaryMetrics()); - for (final RrbsCpgDetailMetrics detailMetric : rrbsMetric.getDetailMetrics()) { - detailsFile.addMetric(detailMetric); - } - } - summaryFile.write(SUMMARY_OUT); - detailsFile.write(DETAILS_OUT); - RExecutor.executeFromClasspath(R_SCRIPT, DETAILS_OUT.getAbsolutePath(), SUMMARY_OUT.getAbsolutePath(), PLOTS_OUT.getAbsolutePath()); - CloserUtil.close(samReader); - return 0; - } + rrbsMetric.getDetailMetrics().forEach(detailsFile::addMetric); + }); - private boolean isSequenceFiltered(final String sequenceName) { - return (SEQUENCE_NAMES != null) && (!SEQUENCE_NAMES.isEmpty()) && (!SEQUENCE_NAMES.contains(sequenceName)); - } + summaryFile.write(SUMMARY_OUTPUT); + detailsFile.write(OUTPUT); - private void assertIoFiles(final File summaryFile, final File detailsFile, final File plotsFile) { - IOUtil.assertFileIsReadable(INPUT); - IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); - IOUtil.assertFileIsWritable(summaryFile); - IOUtil.assertFileIsWritable(detailsFile); - IOUtil.assertFileIsWritable(plotsFile); + RExecutor.executeFromClasspath(R_SCRIPT, + OUTPUT.getAbsolutePath(), + SUMMARY_OUTPUT.getAbsolutePath(), + CHART_OUTPUT.getAbsolutePath()); } @Override protected String[] customCommandLineValidation() { - final List errorMsgs = new ArrayList(); + final List errorMsgs = new ArrayList<>(); if (MAX_MISMATCH_RATE < 0 || MAX_MISMATCH_RATE > 1) { errorMsgs.add("MAX_MISMATCH_RATE must be in the range of 0-1"); } diff --git a/src/main/java/picard/analysis/SinglePassSamProgram.java b/src/main/java/picard/analysis/SinglePassSamProgram.java index bb93ed9944..2cda91661a 100644 --- a/src/main/java/picard/analysis/SinglePassSamProgram.java +++ b/src/main/java/picard/analysis/SinglePassSamProgram.java @@ -56,7 +56,7 @@ public abstract class SinglePassSamProgram extends CommandLineProgram { @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "Input SAM or BAM file.") public File INPUT; - @Argument(shortName = "O", doc = "File to write the output to.") + @Argument(shortName = "O", doc = "File to write the output to.", optional = true) public File OUTPUT; @Argument(doc = "If true (default), then the sort order in the header file will be ignored.", diff --git a/src/test/java/picard/analysis/CollectMultipleMetricsTest.java b/src/test/java/picard/analysis/CollectMultipleMetricsTest.java index 9406f034a4..7855c02660 100644 --- a/src/test/java/picard/analysis/CollectMultipleMetricsTest.java +++ b/src/test/java/picard/analysis/CollectMultipleMetricsTest.java @@ -13,6 +13,7 @@ import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; +import picard.metrics.MultilevelMetrics; import picard.sam.SortSam; import static picard.analysis.GcBiasMetricsCollector.PerUnitGcBiasMetricsCollector.*; @@ -33,11 +34,62 @@ public class CollectMultipleMetricsTest extends CommandLineProgramTest { private static final File TEST_DATA_DIR = new File("testdata/picard/sam"); + private static final File TEST_DATA_DIR_RRBS = new File("testdata/picard/metrics"); public String getCommandLineProgramName() { return CollectMultipleMetrics.class.getSimpleName(); } + @Test + public void testCollectRrbsMetrics() throws IOException { + final File input = new File(TEST_DATA_DIR_RRBS, "chrMReads.sam"); + final File reference = new File(TEST_DATA_DIR_RRBS, "chrM.reference.fasta"); + final File outfile = File.createTempFile("crmt.", ".rrbs_summary_metrics"); + outfile.deleteOnExit(); + + final File summary = new File(outfile + ".rrbc.summary_metrics"); + final File detail = new File(outfile + ".rrbc.detail_metrics"); + final File pdf = new File(outfile + ".rrbc.pdf"); + summary.deleteOnExit(); + detail.deleteOnExit(); + pdf.deleteOnExit(); + + final String[] args = new String[] { + "INPUT=" + input.getAbsolutePath(), + "OUTPUT=" + outfile.getAbsolutePath(), + "REFERENCE_SEQUENCE=" + reference.getAbsolutePath(), + "PROGRAM=" + null, + "PROGRAM=" + CollectMultipleMetrics.Program.CollectRrbsMetrics.name()}; + + Assert.assertEquals(runPicardCommandLine(args), 0); + + final RrbsSummaryMetrics metrics = getMultilevelMetrics(summary); + Assert.assertEquals(metrics.READS_ALIGNED.intValue(), 5); + Assert.assertEquals(metrics.NON_CPG_BASES.intValue(), 15); + Assert.assertEquals(metrics.NON_CPG_CONVERTED_BASES.intValue(), 11); + Assert.assertEquals(metrics.PCT_NON_CPG_BASES_CONVERTED, 0.733333); + Assert.assertEquals(metrics.CPG_BASES_SEEN.intValue(), 5); + Assert.assertEquals(metrics.CPG_BASES_CONVERTED.intValue(), 1); + Assert.assertEquals(metrics.PCT_CPG_BASES_CONVERTED, 0.2); + Assert.assertEquals(metrics.MEAN_CPG_COVERAGE, 1.666667); + Assert.assertEquals(metrics.MEDIAN_CPG_COVERAGE.intValue(), 2); + Assert.assertEquals(metrics.READS_WITH_NO_CPG.intValue(), 1); + Assert.assertEquals(metrics.READS_IGNORED_SHORT.intValue(), 1); + Assert.assertEquals(metrics.READS_IGNORED_MISMATCHES.intValue(), 1); + + final RrbsCpgDetailMetrics metricsCpg = getMultilevelMetrics(detail); + Assert.assertEquals(metricsCpg.SEQUENCE_NAME, "chrM"); + Assert.assertEquals(metricsCpg.POSITION.intValue(), 60); + Assert.assertEquals(metricsCpg.TOTAL_SITES.intValue(), 1); + Assert.assertEquals(metricsCpg.CONVERTED_SITES.intValue(), 1); + Assert.assertEquals(metricsCpg.PCT_CONVERTED.intValue(), 1); + } + + private T getMultilevelMetrics(final File file) throws FileNotFoundException { + final MetricsFile retVal = new MetricsFile(); + retVal.read(new FileReader(file)); + return retVal.getMetrics().get(0); + } @Test public void testAlignmentSummaryViaMultipleMetrics() throws IOException { diff --git a/src/test/java/picard/analysis/CollectRrbsMetricsTest.java b/src/test/java/picard/analysis/CollectRrbsMetricsTest.java new file mode 100644 index 0000000000..d932cb8ffa --- /dev/null +++ b/src/test/java/picard/analysis/CollectRrbsMetricsTest.java @@ -0,0 +1,182 @@ +package picard.analysis; + +import htsjdk.samtools.metrics.MetricsFile; +import htsjdk.samtools.util.IOUtil; +import org.testng.Assert; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; +import picard.cmdline.CommandLineProgramTest; +import picard.metrics.MultilevelMetrics; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +public class CollectRrbsMetricsTest extends CommandLineProgramTest { + private static final String CHR_M_SAM = "testdata/picard/metrics/chrMReads.sam"; + private static final String CHR_M_REFERENCE = "testdata/picard/metrics/chrM.reference.fasta"; + private static final String CHR_M_SAM_TEST_UNMAPPED = "testdata/picard/sam/collect_rrbs_metrics_test_unmapped.sam"; + private static final String CHR_M_REFERENCE_TEST_UNMAPPED = "testdata/picard/sam/summary_alignment_stats_test.fasta"; + + private File pdf; + private File summaryOutput; + private File detailOutput; + private File tempDir; + private String prefix; + + @Override + public String getCommandLineProgramName() { + return CollectRrbsMetrics.class.getSimpleName(); + } + + @BeforeClass + private void setUp() throws Exception { + pdf = File.createTempFile("crmt.", ".pdf"); + pdf.deleteOnExit(); + summaryOutput = File.createTempFile("crmt.", ".rrbs_summary_metrics"); + summaryOutput.deleteOnExit(); + detailOutput = File.createTempFile("crmt.", "detail.sam"); + detailOutput.deleteOnExit(); + tempDir = Files.createTempDirectory("crmt.").toFile(); + prefix = Paths.get(tempDir.toString(), "crmt.").toString(); + } + + @AfterClass + public void clearTempDir() throws IOException { + IOUtil.deleteDirectoryTree(tempDir); + } + + @Test + public void chrMReads() throws Exception { + Assert.assertEquals( + runPicardCommandLine( + makeArgList(CHR_M_SAM, null, detailOutput.getAbsolutePath(), pdf.getAbsolutePath(), summaryOutput.getAbsolutePath(), CHR_M_REFERENCE) + ), + 0 + ); + + final RrbsSummaryMetrics metrics = getMultilevelMetrics(summaryOutput); + Assert.assertEquals(metrics.READS_ALIGNED.intValue(), 5); + Assert.assertEquals(metrics.NON_CPG_BASES.intValue(), 15); + Assert.assertEquals(metrics.NON_CPG_CONVERTED_BASES.intValue(), 11); + Assert.assertEquals(metrics.PCT_NON_CPG_BASES_CONVERTED, 0.733333); + Assert.assertEquals(metrics.CPG_BASES_SEEN.intValue(), 5); + Assert.assertEquals(metrics.CPG_BASES_CONVERTED.intValue(), 1); + Assert.assertEquals(metrics.PCT_CPG_BASES_CONVERTED, 0.2); + Assert.assertEquals(metrics.MEAN_CPG_COVERAGE, 1.666667); + Assert.assertEquals(metrics.MEDIAN_CPG_COVERAGE.intValue(), 2); + Assert.assertEquals(metrics.READS_WITH_NO_CPG.intValue(), 1); + Assert.assertEquals(metrics.READS_IGNORED_SHORT.intValue(), 1); + Assert.assertEquals(metrics.READS_IGNORED_MISMATCHES.intValue(), 1); + } + + @Test + public void testRrbsCpgDetailMetrics() throws Exception { + Assert.assertEquals( + runPicardCommandLine( + makeArgList(CHR_M_SAM, null, detailOutput.getAbsolutePath(), pdf.getAbsolutePath(), summaryOutput.getAbsolutePath(), CHR_M_REFERENCE) + ), + 0 + ); + + final RrbsCpgDetailMetrics metricsCpg = getMultilevelMetrics(detailOutput); + Assert.assertEquals(metricsCpg.SEQUENCE_NAME, "chrM"); + Assert.assertEquals(metricsCpg.POSITION.intValue(), 60); + Assert.assertEquals(metricsCpg.TOTAL_SITES.intValue(), 1); + Assert.assertEquals(metricsCpg.CONVERTED_SITES.intValue(), 1); + Assert.assertEquals(metricsCpg.PCT_CONVERTED.intValue(), 1); + } + + @Test + public void testUnmappedReads() throws Exception { + Assert.assertEquals( + runPicardCommandLine( + makeArgList(CHR_M_SAM_TEST_UNMAPPED, null, detailOutput.getAbsolutePath(), pdf.getAbsolutePath(), summaryOutput.getAbsolutePath(), CHR_M_REFERENCE_TEST_UNMAPPED) + ), + 0 + ); + + final MetricsFile metricsFile = new MetricsFile(); + metricsFile.read(new FileReader(detailOutput)); + + Assert.assertEquals(metricsFile.getMetrics().size(), 2); // this metric skips unmapped reads, so we get 2 reads instead of 3 + } + + @Test + public void testRrbsCpgDetailMetricsByPrefix() throws Exception { + Assert.assertEquals( + runPicardCommandLine( + makeArgList(CHR_M_SAM, prefix, null, null, null, CHR_M_REFERENCE) + ), + 0 + ); + + final String output = prefix + CollectRrbsMetrics.DETAIL_FILE_EXTENSION; + final RrbsCpgDetailMetrics metricsCpg = getMultilevelMetrics(output); + Assert.assertEquals(metricsCpg.SEQUENCE_NAME, "chrM"); + Assert.assertEquals(metricsCpg.POSITION.intValue(), 60); + Assert.assertEquals(metricsCpg.TOTAL_SITES.intValue(), 1); + Assert.assertEquals(metricsCpg.CONVERTED_SITES.intValue(), 1); + Assert.assertEquals(metricsCpg.PCT_CONVERTED.intValue(), 1); + } + + private T getMultilevelMetrics(final File file) throws FileNotFoundException { + final MetricsFile metricsFile = new MetricsFile(); + metricsFile.read(new FileReader(file)); + return metricsFile.getMetrics().get(0); + } + + private T getMultilevelMetrics(final String fileName) throws FileNotFoundException { + return getMultilevelMetrics(new File(fileName)); + } + + @Test(dataProvider = "incorrectArguments", expectedExceptions = IllegalArgumentException.class) + public void checkingArguments(final String prefix, final String detailOutput, final String chartOutput, final String summaryOutput) { + runPicardCommandLine(makeArgList(CHR_M_SAM, prefix, detailOutput, chartOutput, summaryOutput, CHR_M_REFERENCE)); + } + + @DataProvider(name = "incorrectArguments") + public Object[][] makeIncorrectArguments() { + return new Object[][]{ + // METRICS_FILE_PREFIX OUTPUT CHART_OUTPUT SUMMARY_OUTPUT + {prefix, detailOutput, null, null}, + {prefix, null, pdf, null}, + {prefix, null, null, summaryOutput}, + {null, detailOutput, pdf, null}, + {null, detailOutput, null, summaryOutput}, + {null, null, pdf, summaryOutput}, + }; + } + + private List makeArgList(final String input, final String prefix, + final String detailOutput, final String chartOutput, final String summaryOutput, + final String referenceSequence) { + List args = new ArrayList<>(); + if (input != null) { + args.add("INPUT=" + input); + } + if (prefix != null) { + args.add("METRICS_FILE_PREFIX=" + prefix); + } + if (detailOutput != null) { + args.add("OUTPUT=" + detailOutput); + } + if (chartOutput != null) { + args.add("CHART_OUTPUT=" + chartOutput); + } + if (summaryOutput != null) { + args.add("SUMMARY_OUTPUT=" + summaryOutput); + } + if (referenceSequence != null) { + args.add("R=" + referenceSequence); + } + return args; + } +} \ No newline at end of file diff --git a/src/test/java/picard/metrics/CollectRrbsMetricsTest.java b/src/test/java/picard/metrics/CollectRrbsMetricsTest.java deleted file mode 100644 index 0f254d1b38..0000000000 --- a/src/test/java/picard/metrics/CollectRrbsMetricsTest.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * The MIT License - * - * Copyright (c) 2013 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -package picard.metrics; - -import htsjdk.samtools.util.IOUtil; -import htsjdk.samtools.metrics.MetricsFile; -import org.testng.Assert; -import org.testng.annotations.AfterTest; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; -import picard.analysis.CollectRrbsMetrics; -import picard.analysis.RrbsSummaryMetrics; - -import java.io.File; -import java.io.FileReader; -import java.lang.Exception;import java.lang.Integer;import java.lang.String;import java.util.ArrayList; -import java.util.List; - -/** - * @author jgentry@broadinstitute.org - */ - -public class CollectRrbsMetricsTest { - public static final String CHR_M_SAM = "testdata/picard/metrics/chrMReads.sam"; - public static final String CHR_M_REFERENCE ="testdata/picard/metrics/chrM.reference.fasta"; - - private File rootTestDir; - - @BeforeTest - private void setUp() throws Exception { - rootTestDir = File.createTempFile("crmt.", ".tmp"); - Assert.assertTrue(rootTestDir.delete()); - Assert.assertTrue(rootTestDir.mkdir()); - } - - @AfterTest - private void tearDown() { - IOUtil.deleteDirectoryTree(rootTestDir); - } - - @Test - public void chrMReads() throws Exception { - final MetricsFile metricsFile = getSummaryFile(CHR_M_SAM, CHR_M_REFERENCE, rootTestDir + "/READ_TEST", new ArrayList()); - final RrbsSummaryMetrics metrics = metricsFile.getMetrics().get(0); - Assert.assertEquals(metrics.READS_ALIGNED.intValue(), 5); - Assert.assertEquals(metrics.NON_CPG_BASES.intValue(), 15); - Assert.assertEquals(metrics.NON_CPG_CONVERTED_BASES.intValue(), 11); - Assert.assertEquals(metrics.PCT_NON_CPG_BASES_CONVERTED, 0.733333); - Assert.assertEquals(metrics.CPG_BASES_SEEN.intValue(), 5); - Assert.assertEquals(metrics.CPG_BASES_CONVERTED.intValue(), 1); - Assert.assertEquals(metrics.PCT_CPG_BASES_CONVERTED, 0.2); - Assert.assertEquals(metrics.MEAN_CPG_COVERAGE, 1.666667); - Assert.assertEquals(metrics.MEDIAN_CPG_COVERAGE.intValue(), 2); - Assert.assertEquals(metrics.READS_WITH_NO_CPG.intValue(), 1); - Assert.assertEquals(metrics.READS_IGNORED_SHORT.intValue(), 1); - Assert.assertEquals(metrics.READS_IGNORED_MISMATCHES.intValue(), 1); - } - - private MetricsFile getSummaryFile(final String input, final String reference, final String prefix, - final List sequences) throws Exception { - final List argList = new ArrayList(); - argList.add("INPUT=" + input); - argList.add("METRICS_FILE_PREFIX=" + prefix); - argList.add("REFERENCE=" + reference); - for (final String sequence : sequences) { - argList.add("SEQUENCE_NAMES=" + sequence); - } - - final String[] args = new String[argList.size()]; - argList.toArray(args); - - Assert.assertEquals(new CollectRrbsMetrics().instanceMain(args), 0); - - final MetricsFile retVal = new MetricsFile(); - retVal.read(new FileReader(prefix + ".rrbs_summary_metrics")); - return retVal; - } - - -} diff --git a/testdata/picard/sam/collect_rrbs_metrics_test_unmapped.sam b/testdata/picard/sam/collect_rrbs_metrics_test_unmapped.sam new file mode 100644 index 0000000000..990c703cd3 --- /dev/null +++ b/testdata/picard/sam/collect_rrbs_metrics_test_unmapped.sam @@ -0,0 +1,10 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:101 +@SQ SN:chr2 LN:101 +@SQ SN:chr3 LN:101 +@SQ SN:chr4 LN:101 +@SQ SN:chr5 LN:101 +@RG ID:0 SM:Hi,Daddy! LB:whatever PU:me PL:ILLUMINA +SL-XAV:1:1:0:1914#0/2 157 * 0 0 * * 0 0 CGTATGCGCTNTTTATGTCGCCCACAGTGCCTAGTATAGCCCCTGCTAATAAAAAGAGATGAATACGTTTACTTAAAAAACTGAAACTAGNAATGTGCAAN (0,7&&*/*0*,)10/).-*&.&*/6669.&-337599;3,&,6/.,5::999987893+387020775777547999::668997448:::9;999::0& RG:Z:0 +SL-XAV:1:1:0:1639#0/2 153 chr4 1 255 101M * 0 0 CGTGATACCANCTCATGTTCACAGCCAAAGCCTGAAGCTGTCTATTATATTTCTCAACCATAAACTTTTGCCTCAGGCATCCGCAGAATGNTTTGCAGCCN '.&.&&'.0+01'2(1'(''-)','+0041/.+032;:867115/5267-.0/)-5.&-26200224,,0+0/0275/5605688::646875568882*& RG:Z:0 +SL-XAV:1:1:0:68#0/2 137 chr5 1 255 101M * 0 0 NTCTCATTTANAAATGGTTATAAAAACATTTATGCTGAAAAGGTGAAGTTCATTAATGAACAGGCTGACTGTCTCACTATCGCGTTCGCANGACGTTATCT &1<<999;;;;<<<87579:556972789977444.'.023.&,7621/54.49.)/53055-22--''+(.'-))6-168/(3&&0(<).))*&&&&&'0 RG:Z:0