Skip to content

Commit

Permalink
Updated plotting for ModelSegments CNV pipeline. (#3729)
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelklee authored and jonn-smith committed Nov 27, 2017
1 parent 4695e6e commit 066d972
Show file tree
Hide file tree
Showing 28 changed files with 30,989 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package org.broadinstitute.hellbender.tools.copynumber.coverage.copyratio;

import htsjdk.samtools.util.Locatable;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;

public class CopyRatio implements Locatable {
private final SimpleInterval interval;
private final double log2CopyRatioValue;

public CopyRatio(final SimpleInterval interval,
final double log2CopyRatioValue) {
Utils.nonNull(interval);
this.interval = interval;
this.log2CopyRatioValue = log2CopyRatioValue;
}

@Override
public String getContig() {
return interval.getContig();
}

@Override
public int getStart() {
return interval.getStart();
}

@Override
public int getEnd() {
return interval.getEnd();
}

public SimpleInterval getInterval() {
return interval;
}

public double getLog2CopyRatioValue() {
return log2CopyRatioValue;
}

/**
* The midpoint is used to characterize the interval for the purposes of determining overlaps when combining
* copy-ratio and allele-fraction segmentations, so that each copy-ratio interval will be uniquely contained
* in a single segment.
*/
public SimpleInterval getMidpoint() {
final int midPoint = (getStart() + getEnd()) / 2;
return new SimpleInterval(interval.getContig(), midPoint, midPoint);
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final CopyRatio copyRatio = (CopyRatio) o;
return Double.compare(copyRatio.log2CopyRatioValue, log2CopyRatioValue) == 0 && interval.equals(copyRatio.interval);
}

@Override
public int hashCode() {
int result;
long temp;
result = interval.hashCode();
temp = Double.doubleToLongBits(log2CopyRatioValue);
result = 31 * result + (int) (temp ^ (temp >>> 32));
return result;
}

@Override
public String toString() {
return "CopyRatio{" +
"interval=" + interval +
", log2CopyRatioValue=" + log2CopyRatioValue +
'}';
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package org.broadinstitute.hellbender.tools.copynumber.coverage.copyratio;

import htsjdk.samtools.util.OverlapDetector;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.SampleLocatableCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleMetadata;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.tsv.DataLine;
import org.broadinstitute.hellbender.utils.tsv.TableColumnCollection;

import java.io.File;
import java.util.List;
import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.stream.Collectors;

public final class CopyRatioCollection extends SampleLocatableCollection<CopyRatio> {
enum CopyRatioTableColumn {
CONTIG,
START,
END,
LOG2_COPY_RATIO;

static final TableColumnCollection COLUMNS = new TableColumnCollection((Object[]) values());
}

private static final Function<DataLine, CopyRatio> COPY_RATIO_RECORD_FROM_DATA_LINE_DECODER = dataLine -> {
final String contig = dataLine.get(CopyRatioTableColumn.CONTIG);
final int start = dataLine.getInt(CopyRatioTableColumn.START);
final int end = dataLine.getInt(CopyRatioTableColumn.END);
final double copyRatio = dataLine.getDouble(CopyRatioTableColumn.LOG2_COPY_RATIO);
final SimpleInterval interval = new SimpleInterval(contig, start, end);
return new CopyRatio(interval, copyRatio);
};

private static final BiConsumer<CopyRatio, DataLine> COPY_RATIO_RECORD_TO_DATA_LINE_ENCODER = (copyRatio, dataLine) ->
dataLine.append(copyRatio.getInterval().getContig())
.append(copyRatio.getInterval().getStart())
.append(copyRatio.getInterval().getEnd())
.append(copyRatio.getLog2CopyRatioValue());

public CopyRatioCollection(final File inputFile) {
super(inputFile, CopyRatioTableColumn.COLUMNS, COPY_RATIO_RECORD_FROM_DATA_LINE_DECODER, COPY_RATIO_RECORD_TO_DATA_LINE_ENCODER);
}

public CopyRatioCollection(final SampleMetadata sampleMetadata,
final List<CopyRatio> copyRatios) {
super(sampleMetadata, copyRatios, CopyRatioTableColumn.COLUMNS, COPY_RATIO_RECORD_FROM_DATA_LINE_DECODER, COPY_RATIO_RECORD_TO_DATA_LINE_ENCODER);
}

public List<Double> getLog2CopyRatioValues() {
return getRecords().stream().map(CopyRatio::getLog2CopyRatioValue).collect(Collectors.toList());
}

/**
* The midpoint is used to characterize the interval for the purposes of determining overlaps when combining
* copy-ratio and allele-fraction segmentations, so that each copy-ratio interval will be uniquely contained
* in a single segment.
*/
public OverlapDetector<CopyRatio> getMidpointOverlapDetector() {
return OverlapDetector.create(getRecords().stream()
.map(cr -> new CopyRatio(cr.getMidpoint(), cr.getLog2CopyRatioValue()))
.collect(Collectors.toList()));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package org.broadinstitute.hellbender.tools.copynumber.formats;

public final class CopyNumberStandardArgument {
public static final String ANNOTATED_INTERVALS_FILE_LONG_NAME = "annotatedIntervals";
public static final String ANNOTATED_INTERVALS_FILE_SHORT_NAME = "annot";

public static final String READ_COUNT_PANEL_OF_NORMALS_FILE_LONG_NAME = "readCountPanelOfNormals";
public static final String READ_COUNT_PANEL_OF_NORMALS_FILE_SHORT_NAME = "rcPON";

public static final String NUMBER_OF_EIGENSAMPLES_LONG_NAME = "numberOfEigensamples";
public static final String NUMBER_OF_EIGENSAMPLES_SHORT_NAME = "numEigen";

public static final String STANDARDIZED_COPY_RATIOS_FILE_LONG_NAME = "standardizedCopyRatios";
public static final String STANDARDIZED_COPY_RATIOS_FILE_SHORT_NAME = "standardizedCR";

public static final String DENOISED_COPY_RATIOS_FILE_LONG_NAME = "denoisedCopyRatios";
public static final String DENOISED_COPY_RATIOS_FILE_SHORT_NAME = "denoisedCR";

public static final String ALLELIC_COUNTS_FILE_LONG_NAME = "allelicCounts";
public static final String ALLELIC_COUNTS_FILE_SHORT_NAME = "AC";

public static final String NORMAL_ALLELIC_COUNTS_FILE_LONG_NAME = "normalAllelicCounts";
public static final String NORMAL_ALLELIC_COUNTS_FILE_SHORT_NAME = "normalAC";

public static final String SEGMENTS_FILE_LONG_NAME = "segments";
public static final String SEGMENTS_FILE_SHORT_NAME = "S";

public static final String OUTPUT_PREFIX_LONG_NAME = "outputPrefix";
public static final String OUTPUT_PREFIX_SHORT_NAME = "pre";
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import htsjdk.samtools.util.OverlapDetector;
import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleMetadata;
import org.broadinstitute.hellbender.utils.IntervalUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.tsv.DataLine;
import org.broadinstitute.hellbender.utils.tsv.TableColumnCollection;
Expand Down Expand Up @@ -76,6 +77,16 @@ private static <T extends Locatable> void validateIntervals(final String sampleN
}
}

/**
* @return a new modifiable list of {@link SimpleInterval}s corresponding to the {@link Locatable}s
* for each record contained in the collection
*/
public List<SimpleInterval> getIntervals() {
return getRecords().stream()
.map(r -> new SimpleInterval(r.getContig(), r.getStart(), r.getEnd()))
.collect(Collectors.toList());
}

public OverlapDetector<RECORD> getOverlapDetector() {
return OverlapDetector.create(getRecords());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package org.broadinstitute.hellbender.tools.copynumber.multidimensional.model;

import htsjdk.samtools.util.Locatable;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.mcmc.Decile;
import org.broadinstitute.hellbender.utils.mcmc.DecileCollection;

import java.util.List;

public class ModeledSegment implements Locatable {
private final SimpleInterval interval;
private final int numPointsCopyRatio;
private final int numPointsAlleleFraction;

private final SimplePosteriorSummary log2CopyRatioSimplePosteriorSummary;
private final SimplePosteriorSummary minorAlleleFractionSimplePosteriorSummary;

public ModeledSegment(final SimpleInterval interval,
final int numPointsCopyRatio,
final int numPointsAlleleFraction,
final SimplePosteriorSummary log2CopyRatioSimplePosteriorSummary,
final SimplePosteriorSummary minorAlleleFractionSimplePosteriorSummary) {
Utils.validateArg(numPointsCopyRatio > 0 || numPointsAlleleFraction > 0,
String.format("Number of copy-ratio points or number of allele-fraction points must be positive: %s", interval));
this.interval = Utils.nonNull(interval);
this.numPointsCopyRatio = numPointsCopyRatio;
this.numPointsAlleleFraction = numPointsAlleleFraction;
this.log2CopyRatioSimplePosteriorSummary = Utils.nonNull(log2CopyRatioSimplePosteriorSummary);
this.minorAlleleFractionSimplePosteriorSummary = Utils.nonNull(minorAlleleFractionSimplePosteriorSummary);
}

@Override
public String getContig() {
return interval.getContig();
}

@Override
public int getStart() {
return interval.getStart();
}

@Override
public int getEnd() {
return interval.getEnd();
}

public SimpleInterval getInterval() {
return interval;
}

public int getNumPointsCopyRatio() {
return numPointsCopyRatio;
}

public int getNumPointsAlleleFraction() {
return numPointsAlleleFraction;
}

public SimplePosteriorSummary getLog2CopyRatioSimplePosteriorSummary() {
return log2CopyRatioSimplePosteriorSummary;
}

public SimplePosteriorSummary getMinorAlleleFractionSimplePosteriorSummary() {
return minorAlleleFractionSimplePosteriorSummary;
}

public static final class SimplePosteriorSummary {
private final double decile10;
private final double decile50;
private final double decile90;

public SimplePosteriorSummary(final double decile10,
final double decile50,
final double decile90) {
this.decile10 = decile10;
this.decile50 = decile50;
this.decile90 = decile90;
}

public SimplePosteriorSummary(final List<Double> samples) {
final DecileCollection deciles = new DecileCollection(samples);
this.decile10 = deciles.get(Decile.DECILE_10);
this.decile50 = deciles.get(Decile.DECILE_50);
this.decile90 = deciles.get(Decile.DECILE_90);
}

public double getDecile10() {
return decile10;
}

public double getDecile50() {
return decile50;
}

public double getDecile90() {
return decile90;
}

@Override
public String toString() {
return "SimplePosteriorSummary{" +
"decile10=" + decile10 +
", decile50=" + decile50 +
", decile90=" + decile90 +
'}';
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package org.broadinstitute.hellbender.tools.copynumber.multidimensional.model;

import org.broadinstitute.hellbender.tools.copynumber.formats.collections.SampleLocatableCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleMetadata;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.tsv.DataLine;
import org.broadinstitute.hellbender.utils.tsv.TableColumnCollection;

import java.io.File;
import java.util.List;
import java.util.function.BiConsumer;
import java.util.function.Function;

/**
* @author Samuel Lee &lt;slee@broadinstitute.org&gt;
*/
public final class ModeledSegmentCollection extends SampleLocatableCollection<ModeledSegment> {
private static final String DOUBLE_FORMAT = "%6.6f"; //TODO replace this with MultidimensionalModeller.DOUBLE_FORMAT from sl_wgs_acnv branch

enum ModeledSegmentTableColumn {
CONTIG,
START,
END,
NUM_POINTS_COPY_RATIO,
NUM_POINTS_ALLELE_FRACTION,
LOG2_COPY_RATIO_POSTERIOR_10,
LOG2_COPY_RATIO_POSTERIOR_50,
LOG2_COPY_RATIO_POSTERIOR_90,
MINOR_ALLELE_FRACTION_POSTERIOR_10,
MINOR_ALLELE_FRACTION_POSTERIOR_50,
MINOR_ALLELE_FRACTION_POSTERIOR_90;

static final TableColumnCollection COLUMNS = new TableColumnCollection((Object[]) values());
}

private static final Function<DataLine, ModeledSegment> MODELED_SEGMENT_RECORD_FROM_DATA_LINE_DECODER = dataLine -> {
final String contig = dataLine.get(ModeledSegmentTableColumn.CONTIG);
final int start = dataLine.getInt(ModeledSegmentTableColumn.START);
final int end = dataLine.getInt(ModeledSegmentTableColumn.END);
final int numPointsCopyRatio = dataLine.getInt(ModeledSegmentTableColumn.NUM_POINTS_COPY_RATIO);
final int numPointsAlleleFraction = dataLine.getInt(ModeledSegmentTableColumn.NUM_POINTS_ALLELE_FRACTION);
final double log2CopyRatioPosterior10 = dataLine.getDouble(ModeledSegmentTableColumn.LOG2_COPY_RATIO_POSTERIOR_10);
final double log2CopyRatioPosterior50 = dataLine.getDouble(ModeledSegmentTableColumn.LOG2_COPY_RATIO_POSTERIOR_50);
final double log2CopyRatioPosterior90 = dataLine.getDouble(ModeledSegmentTableColumn.LOG2_COPY_RATIO_POSTERIOR_90);
final double minorAlleleFractionPosterior10 = dataLine.getDouble(ModeledSegmentTableColumn.MINOR_ALLELE_FRACTION_POSTERIOR_10);
final double minorAlleleFractionPosterior50 = dataLine.getDouble(ModeledSegmentTableColumn.MINOR_ALLELE_FRACTION_POSTERIOR_50);
final double minorAlleleFractionPosterior90 = dataLine.getDouble(ModeledSegmentTableColumn.MINOR_ALLELE_FRACTION_POSTERIOR_90);
final SimpleInterval interval = new SimpleInterval(contig, start, end);
return new ModeledSegment(interval, numPointsCopyRatio, numPointsAlleleFraction,
new ModeledSegment.SimplePosteriorSummary(log2CopyRatioPosterior10, log2CopyRatioPosterior50, log2CopyRatioPosterior90),
new ModeledSegment.SimplePosteriorSummary(minorAlleleFractionPosterior10, minorAlleleFractionPosterior50, minorAlleleFractionPosterior90));
};

private static final BiConsumer<ModeledSegment, DataLine> MODELED_SEGMENT_RECORD_TO_DATA_LINE_ENCODER = (modeledSegment, dataLine) ->
dataLine.append(modeledSegment.getContig())
.append(modeledSegment.getStart())
.append(modeledSegment.getEnd())
.append(modeledSegment.getNumPointsCopyRatio())
.append(modeledSegment.getNumPointsAlleleFraction())
.append(String.format(DOUBLE_FORMAT, modeledSegment.getLog2CopyRatioSimplePosteriorSummary().getDecile10()))
.append(String.format(DOUBLE_FORMAT, modeledSegment.getLog2CopyRatioSimplePosteriorSummary().getDecile50()))
.append(String.format(DOUBLE_FORMAT, modeledSegment.getLog2CopyRatioSimplePosteriorSummary().getDecile90()))
.append(String.format(DOUBLE_FORMAT, modeledSegment.getMinorAlleleFractionSimplePosteriorSummary().getDecile10()))
.append(String.format(DOUBLE_FORMAT, modeledSegment.getMinorAlleleFractionSimplePosteriorSummary().getDecile50()))
.append(String.format(DOUBLE_FORMAT, modeledSegment.getMinorAlleleFractionSimplePosteriorSummary().getDecile90()));

public ModeledSegmentCollection(final File inputFile) {
super(inputFile, ModeledSegmentTableColumn.COLUMNS, MODELED_SEGMENT_RECORD_FROM_DATA_LINE_DECODER, MODELED_SEGMENT_RECORD_TO_DATA_LINE_ENCODER);
}

public ModeledSegmentCollection(final SampleMetadata sampleMetadata,
final List<ModeledSegment> modeledSegments) {
super(sampleMetadata, modeledSegments, ModeledSegmentTableColumn.COLUMNS, MODELED_SEGMENT_RECORD_FROM_DATA_LINE_DECODER, MODELED_SEGMENT_RECORD_TO_DATA_LINE_ENCODER);
}
}
Loading

0 comments on commit 066d972

Please sign in to comment.