From b98d2c2a8690ede3bc8df69f2b300091520e8135 Mon Sep 17 00:00:00 2001 From: lichtens Date: Wed, 15 Aug 2018 09:50:20 -0400 Subject: [PATCH 1/4] Addresses #5114 --- .../somatic/cnv_somatic_pair_workflow.wdl | 1 + .../copynumber/CallCopyRatioSegments.java | 7 +- .../CalledLegacySegmentCollection.java | 107 ++++++++++++++++++ .../formats/records/CalledLegacySegment.java | 46 ++++++++ .../CallCopyRatioSegmentsIntegrationTest.java | 9 ++ 5 files changed, 169 insertions(+), 1 deletion(-) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CalledLegacySegmentCollection.java create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/CalledLegacySegment.java diff --git a/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl b/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl index 833563115a5..2fc6e64d403 100644 --- a/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl +++ b/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl @@ -673,6 +673,7 @@ task CallCopyRatioSegments { output { File called_copy_ratio_segments = "${entity_id}.called.seg" + File called_copy_ratio_legacy_segments = "${entity_id}.called.seg.igv.seg" } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegments.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegments.java index 6073270d213..bba4b98960b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegments.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegments.java @@ -9,6 +9,7 @@ import org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup; import org.broadinstitute.hellbender.tools.copynumber.caller.SimpleCopyRatioCaller; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledCopyRatioSegmentCollection; +import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledLegacySegmentCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CopyRatioSegmentCollection; import org.broadinstitute.hellbender.utils.Utils; @@ -75,7 +76,7 @@ public final class CallCopyRatioSegments extends CommandLineProgram { public static final String NEUTRAL_SEGMENT_COPY_RATIO_UPPER_BOUND_LONG_NAME = "neutral-segment-copy-ratio-upper-bound"; public static final String OUTLIER_NEUTRAL_SEGMENT_COPY_RATIO_Z_SCORE_THRESHOLD_LONG_NAME = "outlier-neutral-segment-copy-ratio-z-score-threshold"; public static final String CALLING_COPY_RATIO_Z_SCORE_THRESHOLD_LONG_NAME = "calling-copy-ratio-z-score-threshold"; - + public static final String IGV_COMPATIBLE_FILE_SUFFIX = ".igv.seg"; @Argument( doc = "Input file containing copy-ratio segments (.cr.seg output of ModelSegments).", fullName = StandardArgumentDefinitions.INPUT_LONG_NAME, @@ -138,6 +139,10 @@ protected Object doWork() { .makeCalls(); calledCopyRatioSegments.write(outputCalledCopyRatioSegmentsFile); + // Write an IGV compatible collection + final CalledLegacySegmentCollection legacySegmentCollection = new CalledLegacySegmentCollection(calledCopyRatioSegments); + legacySegmentCollection.write(new File(outputCalledCopyRatioSegmentsFile.getAbsolutePath() + IGV_COMPATIBLE_FILE_SUFFIX)); + return "SUCCESS"; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CalledLegacySegmentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CalledLegacySegmentCollection.java new file mode 100644 index 00000000000..f33800c2957 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CalledLegacySegmentCollection.java @@ -0,0 +1,107 @@ +package org.broadinstitute.hellbender.tools.copynumber.formats.collections; + +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleLocatableMetadata; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledCopyRatioSegment; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledLegacySegment; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.tsv.DataLine; +import org.broadinstitute.hellbender.utils.tsv.TableColumnCollection; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.function.BiConsumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Represents a CBS-style segmentation to enable IGV-compatible plotting. + * + * IGV ignores column headers and requires that no other headers are present. + * We use the conventional CBS-style column headers (which includes the sample name) + * and suppress the SAM-style metadata header (which breaks the contract for construction from input files). + * See + * http://software.broadinstitute.org/cancer/software/genepattern/file-formats-guide#CBS + * and + * https://software.broadinstitute.org/software/igv/SEG. + * + */ +public final class CalledLegacySegmentCollection extends AbstractSampleLocatableCollection { + //note to developers: repeat the column headers in Javadoc so that they are viewable when linked + /** + * Sample, Chromosome, Start, End, Num_Probes, Call, Segment_Mean + */ + enum CalledLegacySegmentTableColumn { + SAMPLE("Sample"), + CHROMOSOME("Chromosome"), + START("Start"), + END("End"), + NUM_PROBES("Num_Probes"), + CALL("Call"), + SEGMENT_MEAN("Segment_Mean"); + + private final String columnName; + + CalledLegacySegmentTableColumn(final String columnName) { + this.columnName = columnName; + } + + static final TableColumnCollection COLUMNS = new TableColumnCollection( + Stream.of(values()).map(c -> c.columnName).toArray()); + } + + private static final Function LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION = dataLine -> { + final String sampleName = dataLine.get(CalledLegacySegmentTableColumn.SAMPLE.columnName); + final String contig = dataLine.get(CalledLegacySegmentTableColumn.CHROMOSOME.columnName); + final int start = dataLine.getInt(CalledLegacySegmentTableColumn.START.columnName); + final int end = dataLine.getInt(CalledLegacySegmentTableColumn.END.columnName); + final int numProbes = dataLine.getInt(CalledLegacySegmentTableColumn.NUM_PROBES.columnName); + final double segmentMean = dataLine.getDouble(CalledLegacySegmentTableColumn.SEGMENT_MEAN.columnName); + final String callOutputString = dataLine.get(CalledCopyRatioSegmentCollection.CalledCopyRatioSegmentTableColumn.CALL); + final CalledCopyRatioSegment.Call call = Arrays.stream(CalledCopyRatioSegment.Call.values()) + .filter(c -> c.getOutputString().equals(callOutputString)).findFirst().orElse(null);; + final SimpleInterval interval = new SimpleInterval(contig, start, end); + return new CalledLegacySegment(sampleName, interval, numProbes, segmentMean, call); + }; + + private static final BiConsumer LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER = (calledLegacySegment, dataLine) -> + dataLine.append(calledLegacySegment.getSampleName()) + .append(calledLegacySegment.getContig()) + .append(calledLegacySegment.getStart()) + .append(calledLegacySegment.getEnd()) + .append(calledLegacySegment.getNumProbes()) + .append(calledLegacySegment.getCall().getOutputString()) + .append(formatDouble(calledLegacySegment.getSegmentMean())); + + public CalledLegacySegmentCollection(final SampleLocatableMetadata metadata, + final List LegacySegments) { + super(metadata, LegacySegments, CalledLegacySegmentTableColumn.COLUMNS, LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER); + } + + public CalledLegacySegmentCollection(final CalledCopyRatioSegmentCollection collection) { + this(collection.getMetadata(), collection.getRecords().stream() + .map(r -> convert(r, collection.getMetadata().getSampleName())).collect(Collectors.toList())); + } + + public CalledLegacySegmentCollection(final File inputFile) { + super(inputFile, CopyRatioSegmentCollection.CopyRatioSegmentTableColumn.COLUMNS, LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER); + } + + private static CalledLegacySegment convert(final CalledCopyRatioSegment seg, final String sampleName) { + return new CalledLegacySegment(sampleName, seg.getInterval(), seg.getNumPoints(), seg.getMeanLog2CopyRatio(), seg.getCall()); + } + + // output of SAM-style header is suppressed + @Override + public void write(final File outputFile) { + try (final RecordWriter recordWriter = new RecordWriter(new FileWriter(outputFile, true))) { + recordWriter.writeAllRecords(getRecords()); + } catch (final IOException e) { + throw new UserException.CouldNotCreateOutputFile(outputFile, e); + } + } +} \ No newline at end of file diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/CalledLegacySegment.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/CalledLegacySegment.java new file mode 100644 index 00000000000..f9196aed004 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/CalledLegacySegment.java @@ -0,0 +1,46 @@ +package org.broadinstitute.hellbender.tools.copynumber.formats.records; + +import org.broadinstitute.hellbender.utils.SimpleInterval; + +public class CalledLegacySegment extends LegacySegment { + + private CalledCopyRatioSegment.Call call; + + public CalledLegacySegment(final String sampleName, final SimpleInterval interval, final int numProbes, final double segmentMean, + final CalledCopyRatioSegment.Call call) { + super(sampleName, interval, numProbes, segmentMean); + this.call = call; + } + + public CalledCopyRatioSegment.Call getCall() { + return call; + } + + @Override + public boolean equals(final Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + + CalledLegacySegment that = (CalledLegacySegment) o; + + return call == that.call; + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + (call != null ? call.hashCode() : 0); + return result; + } + + @Override + public String toString() { + return "CalledLegacyCopyRatioSegment{" + + "interval=" + getInterval() + + ", numPoints=" + getNumProbes() + + ", meanLog2CopyRatio=" + getSegmentMean() + + ", call=" + call + + '}'; + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java index 1ebaa17da3d..48c16673ecd 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java @@ -4,6 +4,7 @@ import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledCopyRatioSegmentCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CopyRatioSegmentCollection; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; +import org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedIntervalCollection; import org.testng.Assert; import org.testng.annotations.Test; @@ -29,5 +30,13 @@ public void testCallSegments() { Assert.assertEquals(calledCopyRatioSegments.getMetadata(), copyRatioSegments.getMetadata()); Assert.assertEquals(calledCopyRatioSegments.getIntervals(), copyRatioSegments.getIntervals()); Assert.assertEquals(calledCopyRatioSegments.getRecords().stream().map(s -> s.getCall().getOutputString()).toArray(), new String[] {"+", "-", "0", "0"}); + + // Test writing the legacy format. Note that reading cannot be done through the CNV tools, since the header has been stripped away. + final File legacySegmentFile = new File(outputFile.getAbsolutePath() + CallCopyRatioSegments.IGV_COMPATIBLE_FILE_SUFFIX); + Assert.assertTrue(legacySegmentFile.exists()); + Assert.assertTrue(legacySegmentFile.length() > 0); + + final AnnotatedIntervalCollection annotatedIntervalCollection = AnnotatedIntervalCollection.create(legacySegmentFile.toPath(), null); + Assert.assertEquals(annotatedIntervalCollection.getRecords().size(), 4); } } From 4c59eecfac16024dc21cb8dbee779fba4854cc67 Mon Sep 17 00:00:00 2001 From: lichtens Date: Wed, 15 Aug 2018 09:52:03 -0400 Subject: [PATCH 2/4] Exposing outputs at the workflow level. --- scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl b/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl index 2fc6e64d403..a0d241d314c 100644 --- a/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl +++ b/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl @@ -446,6 +446,7 @@ workflow CNVSomaticPairWorkflow { File copy_ratio_parameters_tumor = ModelSegmentsTumor.copy_ratio_parameters File allele_fraction_parameters_tumor = ModelSegmentsTumor.allele_fraction_parameters File called_copy_ratio_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_segments + File called_copy_ratio_segments_legacy_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_legacy_segments File denoised_copy_ratios_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_plot File denoised_copy_ratios_lim_4_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_lim_4_plot File standardized_MAD_tumor = PlotDenoisedCopyRatiosTumor.standardized_MAD @@ -472,6 +473,7 @@ workflow CNVSomaticPairWorkflow { File? copy_ratio_parameters_normal = ModelSegmentsNormal.copy_ratio_parameters File? allele_fraction_parameters_normal = ModelSegmentsNormal.allele_fraction_parameters File? called_copy_ratio_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_segments + File? called_copy_ratio_segments_legacy_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_legacy_segments File? denoised_copy_ratios_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_plot File? denoised_copy_ratios_lim_4_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_lim_4_plot File? standardized_MAD_normal = PlotDenoisedCopyRatiosNormal.standardized_MAD From 11d437064e075709b6737734d1736924ec4d213b Mon Sep 17 00:00:00 2001 From: lichtens Date: Tue, 28 Aug 2018 10:12:11 -0400 Subject: [PATCH 3/4] Fixed a rebase issue. --- .../tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java index 48c16673ecd..8c75ba709ef 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java @@ -1,9 +1,9 @@ package org.broadinstitute.hellbender.tools.copynumber; import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledCopyRatioSegmentCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CopyRatioSegmentCollection; -import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedIntervalCollection; import org.testng.Assert; import org.testng.annotations.Test; From b5dab6cffb1f958e492ce19d57343c87f926a294 Mon Sep 17 00:00:00 2001 From: lichtens Date: Tue, 28 Aug 2018 11:01:34 -0400 Subject: [PATCH 4/4] Answering PR comments --- .../somatic/cnv_somatic_pair_workflow.wdl | 6 ++-- .../copynumber/CallCopyRatioSegments.java | 26 +++++++++++++-- .../CalledLegacySegmentCollection.java | 27 +++++++-------- .../collections/LegacySegmentCollection.java | 4 +-- .../formats/records/CalledLegacySegment.java | 33 +++++++++++-------- .../CallCopyRatioSegmentsIntegrationTest.java | 2 +- 6 files changed, 60 insertions(+), 38 deletions(-) diff --git a/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl b/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl index a0d241d314c..1c837b006b6 100644 --- a/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl +++ b/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl @@ -446,7 +446,7 @@ workflow CNVSomaticPairWorkflow { File copy_ratio_parameters_tumor = ModelSegmentsTumor.copy_ratio_parameters File allele_fraction_parameters_tumor = ModelSegmentsTumor.allele_fraction_parameters File called_copy_ratio_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_segments - File called_copy_ratio_segments_legacy_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_legacy_segments + File called_copy_ratio_legacy_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_legacy_segments File denoised_copy_ratios_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_plot File denoised_copy_ratios_lim_4_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_lim_4_plot File standardized_MAD_tumor = PlotDenoisedCopyRatiosTumor.standardized_MAD @@ -473,7 +473,7 @@ workflow CNVSomaticPairWorkflow { File? copy_ratio_parameters_normal = ModelSegmentsNormal.copy_ratio_parameters File? allele_fraction_parameters_normal = ModelSegmentsNormal.allele_fraction_parameters File? called_copy_ratio_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_segments - File? called_copy_ratio_segments_legacy_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_legacy_segments + File? called_copy_ratio_legacy_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_legacy_segments File? denoised_copy_ratios_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_plot File? denoised_copy_ratios_lim_4_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_lim_4_plot File? standardized_MAD_normal = PlotDenoisedCopyRatiosNormal.standardized_MAD @@ -675,7 +675,7 @@ task CallCopyRatioSegments { output { File called_copy_ratio_segments = "${entity_id}.called.seg" - File called_copy_ratio_legacy_segments = "${entity_id}.called.seg.igv.seg" + File called_copy_ratio_legacy_segments = "${entity_id}.called.igv.seg" } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegments.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegments.java index bba4b98960b..0cdda7c66ff 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegments.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegments.java @@ -1,5 +1,7 @@ package org.broadinstitute.hellbender.tools.copynumber; +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.io.FilenameUtils; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.BetaFeature; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; @@ -11,9 +13,12 @@ import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledCopyRatioSegmentCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledLegacySegmentCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CopyRatioSegmentCollection; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledCopyRatioSegment; +import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledLegacySegment; import org.broadinstitute.hellbender.utils.Utils; import java.io.File; +import java.util.stream.Collectors; /** * Calls copy-ratio segments as amplified, deleted, or copy-number neutral. @@ -76,7 +81,7 @@ public final class CallCopyRatioSegments extends CommandLineProgram { public static final String NEUTRAL_SEGMENT_COPY_RATIO_UPPER_BOUND_LONG_NAME = "neutral-segment-copy-ratio-upper-bound"; public static final String OUTLIER_NEUTRAL_SEGMENT_COPY_RATIO_Z_SCORE_THRESHOLD_LONG_NAME = "outlier-neutral-segment-copy-ratio-z-score-threshold"; public static final String CALLING_COPY_RATIO_Z_SCORE_THRESHOLD_LONG_NAME = "calling-copy-ratio-z-score-threshold"; - public static final String IGV_COMPATIBLE_FILE_SUFFIX = ".igv.seg"; + public static final String LEGACY_SEGMENTS_FILE_SUFFIX = ".igv.seg"; @Argument( doc = "Input file containing copy-ratio segments (.cr.seg output of ModelSegments).", fullName = StandardArgumentDefinitions.INPUT_LONG_NAME, @@ -140,9 +145,24 @@ protected Object doWork() { calledCopyRatioSegments.write(outputCalledCopyRatioSegmentsFile); // Write an IGV compatible collection - final CalledLegacySegmentCollection legacySegmentCollection = new CalledLegacySegmentCollection(calledCopyRatioSegments); - legacySegmentCollection.write(new File(outputCalledCopyRatioSegmentsFile.getAbsolutePath() + IGV_COMPATIBLE_FILE_SUFFIX)); + final CalledLegacySegmentCollection legacySegmentCollection = createCalledLegacySegmentCollection(calledCopyRatioSegments); + legacySegmentCollection.write(createCalledLegacyOutputFilename(outputCalledCopyRatioSegmentsFile)); return "SUCCESS"; } + + @VisibleForTesting + public static File createCalledLegacyOutputFilename(final File calledCopyRatioBaseFilename) { + return new File(FilenameUtils.removeExtension(calledCopyRatioBaseFilename.getAbsolutePath()) + LEGACY_SEGMENTS_FILE_SUFFIX); + } + + private static CalledLegacySegmentCollection createCalledLegacySegmentCollection(final CalledCopyRatioSegmentCollection segments) { + return new CalledLegacySegmentCollection(segments.getMetadata(), segments.getRecords().stream() + .map(r -> convert(r, segments.getMetadata().getSampleName())).collect(Collectors.toList())); + } + + private static CalledLegacySegment convert(final CalledCopyRatioSegment segment, final String sampleName) { + return new CalledLegacySegment(sampleName, segment.getInterval(), segment.getNumPoints(), segment.getMeanLog2CopyRatio(), segment.getCall()); + } + } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CalledLegacySegmentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CalledLegacySegmentCollection.java index f33800c2957..ddaf921d132 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CalledLegacySegmentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CalledLegacySegmentCollection.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.tools.copynumber.formats.collections; +import org.apache.commons.lang3.StringUtils; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleLocatableMetadata; import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledCopyRatioSegment; @@ -15,7 +16,6 @@ import java.util.List; import java.util.function.BiConsumer; import java.util.function.Function; -import java.util.stream.Collectors; import java.util.stream.Stream; /** @@ -28,7 +28,6 @@ * http://software.broadinstitute.org/cancer/software/genepattern/file-formats-guide#CBS * and * https://software.broadinstitute.org/software/igv/SEG. - * */ public final class CalledLegacySegmentCollection extends AbstractSampleLocatableCollection { //note to developers: repeat the column headers in Javadoc so that they are viewable when linked @@ -54,7 +53,7 @@ enum CalledLegacySegmentTableColumn { Stream.of(values()).map(c -> c.columnName).toArray()); } - private static final Function LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION = dataLine -> { + private static final Function CALLED_LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION = dataLine -> { final String sampleName = dataLine.get(CalledLegacySegmentTableColumn.SAMPLE.columnName); final String contig = dataLine.get(CalledLegacySegmentTableColumn.CHROMOSOME.columnName); final int start = dataLine.getInt(CalledLegacySegmentTableColumn.START.columnName); @@ -63,12 +62,16 @@ enum CalledLegacySegmentTableColumn { final double segmentMean = dataLine.getDouble(CalledLegacySegmentTableColumn.SEGMENT_MEAN.columnName); final String callOutputString = dataLine.get(CalledCopyRatioSegmentCollection.CalledCopyRatioSegmentTableColumn.CALL); final CalledCopyRatioSegment.Call call = Arrays.stream(CalledCopyRatioSegment.Call.values()) - .filter(c -> c.getOutputString().equals(callOutputString)).findFirst().orElse(null);; + .filter(c -> c.getOutputString().equals(callOutputString)).findFirst().orElseThrow( + () -> new UserException("Attempting to read an invalid value for " + + CalledLegacySegmentTableColumn.CALL +": " + callOutputString + + ". Valid values are " + StringUtils.join(CalledCopyRatioSegment.Call.values(), ", ") + )); final SimpleInterval interval = new SimpleInterval(contig, start, end); return new CalledLegacySegment(sampleName, interval, numProbes, segmentMean, call); }; - private static final BiConsumer LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER = (calledLegacySegment, dataLine) -> + private static final BiConsumer CALLED_LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER = (calledLegacySegment, dataLine) -> dataLine.append(calledLegacySegment.getSampleName()) .append(calledLegacySegment.getContig()) .append(calledLegacySegment.getStart()) @@ -78,21 +81,13 @@ enum CalledLegacySegmentTableColumn { .append(formatDouble(calledLegacySegment.getSegmentMean())); public CalledLegacySegmentCollection(final SampleLocatableMetadata metadata, - final List LegacySegments) { - super(metadata, LegacySegments, CalledLegacySegmentTableColumn.COLUMNS, LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER); + final List calledLegacySegments) { + super(metadata, calledLegacySegments, CalledLegacySegmentTableColumn.COLUMNS, CALLED_LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, CALLED_LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER); } - public CalledLegacySegmentCollection(final CalledCopyRatioSegmentCollection collection) { - this(collection.getMetadata(), collection.getRecords().stream() - .map(r -> convert(r, collection.getMetadata().getSampleName())).collect(Collectors.toList())); - } public CalledLegacySegmentCollection(final File inputFile) { - super(inputFile, CopyRatioSegmentCollection.CopyRatioSegmentTableColumn.COLUMNS, LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER); - } - - private static CalledLegacySegment convert(final CalledCopyRatioSegment seg, final String sampleName) { - return new CalledLegacySegment(sampleName, seg.getInterval(), seg.getNumPoints(), seg.getMeanLog2CopyRatio(), seg.getCall()); + super(inputFile, CopyRatioSegmentCollection.CopyRatioSegmentTableColumn.COLUMNS, CALLED_LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, CALLED_LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER); } // output of SAM-style header is suppressed diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/LegacySegmentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/LegacySegmentCollection.java index 570c97441b3..e1f733a5116 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/LegacySegmentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/LegacySegmentCollection.java @@ -71,8 +71,8 @@ enum LegacySegmentTableColumn { .append(formatDouble(LegacySegment.getSegmentMean())); public LegacySegmentCollection(final SampleLocatableMetadata metadata, - final List LegacySegments) { - super(metadata, LegacySegments, LegacySegmentTableColumn.COLUMNS, LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER); + final List legacySegments) { + super(metadata, legacySegments, LegacySegmentTableColumn.COLUMNS, LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER); } // output of SAM-style header is suppressed diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/CalledLegacySegment.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/CalledLegacySegment.java index f9196aed004..07bf149e72c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/CalledLegacySegment.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/CalledLegacySegment.java @@ -1,14 +1,17 @@ package org.broadinstitute.hellbender.tools.copynumber.formats.records; import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.Utils; public class CalledLegacySegment extends LegacySegment { - private CalledCopyRatioSegment.Call call; + private final CalledCopyRatioSegment.Call call; - public CalledLegacySegment(final String sampleName, final SimpleInterval interval, final int numProbes, final double segmentMean, + public CalledLegacySegment(final String sampleName, final SimpleInterval interval, final int numProbes, + final double segmentMean, final CalledCopyRatioSegment.Call call) { super(sampleName, interval, numProbes, segmentMean); + Utils.nonNull(call, "Cannot initialize a called legacy segment with a null call."); this.call = call; } @@ -17,26 +20,30 @@ public CalledCopyRatioSegment.Call getCall() { } @Override - public boolean equals(final Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - if (!super.equals(o)) return false; - - CalledLegacySegment that = (CalledLegacySegment) o; - + public final boolean equals(final Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + if (!super.equals(o)) { + return false; + } + final CalledLegacySegment that = (CalledLegacySegment) o; return call == that.call; } @Override - public int hashCode() { + public final int hashCode() { int result = super.hashCode(); - result = 31 * result + (call != null ? call.hashCode() : 0); + result = 31 * result + call.hashCode(); return result; } @Override - public String toString() { - return "CalledLegacyCopyRatioSegment{" + + public final String toString() { + return "CalledLegacySegment{" + "interval=" + getInterval() + ", numPoints=" + getNumProbes() + ", meanLog2CopyRatio=" + getSegmentMean() + diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java index 8c75ba709ef..bb0e7fcfbda 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java @@ -32,7 +32,7 @@ public void testCallSegments() { Assert.assertEquals(calledCopyRatioSegments.getRecords().stream().map(s -> s.getCall().getOutputString()).toArray(), new String[] {"+", "-", "0", "0"}); // Test writing the legacy format. Note that reading cannot be done through the CNV tools, since the header has been stripped away. - final File legacySegmentFile = new File(outputFile.getAbsolutePath() + CallCopyRatioSegments.IGV_COMPATIBLE_FILE_SUFFIX); + final File legacySegmentFile = CallCopyRatioSegments.createCalledLegacyOutputFilename(outputFile); Assert.assertTrue(legacySegmentFile.exists()); Assert.assertTrue(legacySegmentFile.length() > 0);