Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Called Legacy Segment code #5115

Merged
merged 4 commits into from
Aug 28, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ workflow CNVSomaticPairWorkflow {
File copy_ratio_parameters_tumor = ModelSegmentsTumor.copy_ratio_parameters
File allele_fraction_parameters_tumor = ModelSegmentsTumor.allele_fraction_parameters
File called_copy_ratio_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_segments
File called_copy_ratio_legacy_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_legacy_segments
File denoised_copy_ratios_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_plot
File denoised_copy_ratios_lim_4_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_lim_4_plot
File standardized_MAD_tumor = PlotDenoisedCopyRatiosTumor.standardized_MAD
Expand All @@ -472,6 +473,7 @@ workflow CNVSomaticPairWorkflow {
File? copy_ratio_parameters_normal = ModelSegmentsNormal.copy_ratio_parameters
File? allele_fraction_parameters_normal = ModelSegmentsNormal.allele_fraction_parameters
File? called_copy_ratio_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_segments
File? called_copy_ratio_legacy_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_legacy_segments
File? denoised_copy_ratios_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_plot
File? denoised_copy_ratios_lim_4_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_lim_4_plot
File? standardized_MAD_normal = PlotDenoisedCopyRatiosNormal.standardized_MAD
Expand Down Expand Up @@ -673,6 +675,7 @@ task CallCopyRatioSegments {

output {
File called_copy_ratio_segments = "${entity_id}.called.seg"
File called_copy_ratio_legacy_segments = "${entity_id}.called.igv.seg"
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.broadinstitute.hellbender.tools.copynumber;

import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.io.FilenameUtils;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.BetaFeature;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
Expand All @@ -9,10 +11,14 @@
import org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup;
import org.broadinstitute.hellbender.tools.copynumber.caller.SimpleCopyRatioCaller;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledCopyRatioSegmentCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledLegacySegmentCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CopyRatioSegmentCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledCopyRatioSegment;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledLegacySegment;
import org.broadinstitute.hellbender.utils.Utils;

import java.io.File;
import java.util.stream.Collectors;

/**
* Calls copy-ratio segments as amplified, deleted, or copy-number neutral.
Expand Down Expand Up @@ -75,7 +81,7 @@ public final class CallCopyRatioSegments extends CommandLineProgram {
public static final String NEUTRAL_SEGMENT_COPY_RATIO_UPPER_BOUND_LONG_NAME = "neutral-segment-copy-ratio-upper-bound";
public static final String OUTLIER_NEUTRAL_SEGMENT_COPY_RATIO_Z_SCORE_THRESHOLD_LONG_NAME = "outlier-neutral-segment-copy-ratio-z-score-threshold";
public static final String CALLING_COPY_RATIO_Z_SCORE_THRESHOLD_LONG_NAME = "calling-copy-ratio-z-score-threshold";

public static final String LEGACY_SEGMENTS_FILE_SUFFIX = ".igv.seg";
@Argument(
doc = "Input file containing copy-ratio segments (.cr.seg output of ModelSegments).",
fullName = StandardArgumentDefinitions.INPUT_LONG_NAME,
Expand Down Expand Up @@ -138,6 +144,25 @@ protected Object doWork() {
.makeCalls();
calledCopyRatioSegments.write(outputCalledCopyRatioSegmentsFile);

// Write an IGV compatible collection
final CalledLegacySegmentCollection legacySegmentCollection = createCalledLegacySegmentCollection(calledCopyRatioSegments);
legacySegmentCollection.write(createCalledLegacyOutputFilename(outputCalledCopyRatioSegmentsFile));

return "SUCCESS";
}

@VisibleForTesting
public static File createCalledLegacyOutputFilename(final File calledCopyRatioBaseFilename) {
return new File(FilenameUtils.removeExtension(calledCopyRatioBaseFilename.getAbsolutePath()) + LEGACY_SEGMENTS_FILE_SUFFIX);
}

private static CalledLegacySegmentCollection createCalledLegacySegmentCollection(final CalledCopyRatioSegmentCollection segments) {
return new CalledLegacySegmentCollection(segments.getMetadata(), segments.getRecords().stream()
.map(r -> convert(r, segments.getMetadata().getSampleName())).collect(Collectors.toList()));
}

private static CalledLegacySegment convert(final CalledCopyRatioSegment segment, final String sampleName) {
return new CalledLegacySegment(sampleName, segment.getInterval(), segment.getNumPoints(), segment.getMeanLog2CopyRatio(), segment.getCall());
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package org.broadinstitute.hellbender.tools.copynumber.formats.collections;

import org.apache.commons.lang3.StringUtils;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleLocatableMetadata;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledCopyRatioSegment;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledLegacySegment;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.tsv.DataLine;
import org.broadinstitute.hellbender.utils.tsv.TableColumnCollection;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.stream.Stream;

/**
* Represents a CBS-style segmentation to enable IGV-compatible plotting.
*
* IGV ignores column headers and requires that no other headers are present.
* We use the conventional CBS-style column headers (which includes the sample name)
* and suppress the SAM-style metadata header (which breaks the contract for construction from input files).
* See <a href="http://software.broadinstitute.org/cancer/software/genepattern/file-formats-guide#CBS">
* http://software.broadinstitute.org/cancer/software/genepattern/file-formats-guide#CBS</a>
* and <a href="https://software.broadinstitute.org/software/igv/SEG">
* https://software.broadinstitute.org/software/igv/SEG</a>.
*/
public final class CalledLegacySegmentCollection extends AbstractSampleLocatableCollection<CalledLegacySegment> {
//note to developers: repeat the column headers in Javadoc so that they are viewable when linked
/**
* Sample, Chromosome, Start, End, Num_Probes, Call, Segment_Mean
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the canonical order? I thought Call followed Segment_Mean in previous pipelines, since it was just appended to the CBS segment file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want IGV to be able to read it properly, the segment mean must be the last column. I think the legacy format was sub-optimal and people using IGV tended to grab the seg file before the call file was appended.

No action.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, you're right. Sounds good.

*/
enum CalledLegacySegmentTableColumn {
SAMPLE("Sample"),
CHROMOSOME("Chromosome"),
START("Start"),
END("End"),
NUM_PROBES("Num_Probes"),
CALL("Call"),
SEGMENT_MEAN("Segment_Mean");

private final String columnName;

CalledLegacySegmentTableColumn(final String columnName) {
this.columnName = columnName;
}

static final TableColumnCollection COLUMNS = new TableColumnCollection(
Stream.of(values()).map(c -> c.columnName).toArray());
}

private static final Function<DataLine, CalledLegacySegment> CALLED_LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION = dataLine -> {
final String sampleName = dataLine.get(CalledLegacySegmentTableColumn.SAMPLE.columnName);
final String contig = dataLine.get(CalledLegacySegmentTableColumn.CHROMOSOME.columnName);
final int start = dataLine.getInt(CalledLegacySegmentTableColumn.START.columnName);
final int end = dataLine.getInt(CalledLegacySegmentTableColumn.END.columnName);
final int numProbes = dataLine.getInt(CalledLegacySegmentTableColumn.NUM_PROBES.columnName);
final double segmentMean = dataLine.getDouble(CalledLegacySegmentTableColumn.SEGMENT_MEAN.columnName);
final String callOutputString = dataLine.get(CalledCopyRatioSegmentCollection.CalledCopyRatioSegmentTableColumn.CALL);
final CalledCopyRatioSegment.Call call = Arrays.stream(CalledCopyRatioSegment.Call.values())
.filter(c -> c.getOutputString().equals(callOutputString)).findFirst().orElseThrow(
() -> new UserException("Attempting to read an invalid value for " +
CalledLegacySegmentTableColumn.CALL +": " + callOutputString +
". Valid values are " + StringUtils.join(CalledCopyRatioSegment.Call.values(), ", ")
));
final SimpleInterval interval = new SimpleInterval(contig, start, end);
return new CalledLegacySegment(sampleName, interval, numProbes, segmentMean, call);
};

private static final BiConsumer<CalledLegacySegment, DataLine> CALLED_LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER = (calledLegacySegment, dataLine) ->
dataLine.append(calledLegacySegment.getSampleName())
.append(calledLegacySegment.getContig())
.append(calledLegacySegment.getStart())
.append(calledLegacySegment.getEnd())
.append(calledLegacySegment.getNumProbes())
.append(calledLegacySegment.getCall().getOutputString())
.append(formatDouble(calledLegacySegment.getSegmentMean()));

public CalledLegacySegmentCollection(final SampleLocatableMetadata metadata,
final List<CalledLegacySegment> calledLegacySegments) {
super(metadata, calledLegacySegments, CalledLegacySegmentTableColumn.COLUMNS, CALLED_LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, CALLED_LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER);
}


public CalledLegacySegmentCollection(final File inputFile) {
super(inputFile, CopyRatioSegmentCollection.CopyRatioSegmentTableColumn.COLUMNS, CALLED_LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, CALLED_LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER);
}

// output of SAM-style header is suppressed
@Override
public void write(final File outputFile) {
try (final RecordWriter recordWriter = new RecordWriter(new FileWriter(outputFile, true))) {
recordWriter.writeAllRecords(getRecords());
} catch (final IOException e) {
throw new UserException.CouldNotCreateOutputFile(outputFile, e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ enum LegacySegmentTableColumn {
.append(formatDouble(LegacySegment.getSegmentMean()));

public LegacySegmentCollection(final SampleLocatableMetadata metadata,
final List<LegacySegment> LegacySegments) {
super(metadata, LegacySegments, LegacySegmentTableColumn.COLUMNS, LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER);
final List<LegacySegment> legacySegments) {
super(metadata, legacySegments, LegacySegmentTableColumn.COLUMNS, LEGACY_SEGMENT_DATA_LINE_TO_RECORD_FUNCTION, LEGACY_SEGMENT_RECORD_AND_DATA_LINE_BI_CONSUMER);
}

// output of SAM-style header is suppressed
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package org.broadinstitute.hellbender.tools.copynumber.formats.records;

import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;

public class CalledLegacySegment extends LegacySegment {

private final CalledCopyRatioSegment.Call call;

public CalledLegacySegment(final String sampleName, final SimpleInterval interval, final int numProbes,
final double segmentMean,
final CalledCopyRatioSegment.Call call) {
super(sampleName, interval, numProbes, segmentMean);
Utils.nonNull(call, "Cannot initialize a called legacy segment with a null call.");
this.call = call;
}

public CalledCopyRatioSegment.Call getCall() {
return call;
}

@Override
public final boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!super.equals(o)) {
return false;
}
final CalledLegacySegment that = (CalledLegacySegment) o;
return call == that.call;
}

@Override
public final int hashCode() {
int result = super.hashCode();
result = 31 * result + call.hashCode();
return result;
}

@Override
public final String toString() {
return "CalledLegacySegment{" +
"interval=" + getInterval() +
", numPoints=" + getNumProbes() +
", meanLog2CopyRatio=" + getSegmentMean() +
", call=" + call +
'}';
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package org.broadinstitute.hellbender.tools.copynumber;

import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledCopyRatioSegmentCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CopyRatioSegmentCollection;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedIntervalCollection;
import org.testng.Assert;
import org.testng.annotations.Test;

Expand All @@ -29,5 +30,13 @@ public void testCallSegments() {
Assert.assertEquals(calledCopyRatioSegments.getMetadata(), copyRatioSegments.getMetadata());
Assert.assertEquals(calledCopyRatioSegments.getIntervals(), copyRatioSegments.getIntervals());
Assert.assertEquals(calledCopyRatioSegments.getRecords().stream().map(s -> s.getCall().getOutputString()).toArray(), new String[] {"+", "-", "0", "0"});

// Test writing the legacy format. Note that reading cannot be done through the CNV tools, since the header has been stripped away.
final File legacySegmentFile = CallCopyRatioSegments.createCalledLegacyOutputFilename(outputFile);
Assert.assertTrue(legacySegmentFile.exists());
Assert.assertTrue(legacySegmentFile.length() > 0);

final AnnotatedIntervalCollection annotatedIntervalCollection = AnnotatedIntervalCollection.create(legacySegmentFile.toPath(), null);
Assert.assertEquals(annotatedIntervalCollection.getRecords().size(), 4);
}
}