Skip to content

Commit

Permalink
Merge branch 'dev version 1.0.6'
Browse files Browse the repository at this point in the history
  • Loading branch information
sdentro committed Feb 22, 2017
2 parents 515a492 + b7efb24 commit 30eb359
Show file tree
Hide file tree
Showing 30 changed files with 2,351 additions and 602 deletions.
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
Package: dpclust3p
Type: Package
Title: Dirichlet Process Clustering Pre-Processing Package
Version: 1.0.5
Date: 2016-08-03
Version: 1.0.6
Author: Stefan Dentro
Maintainer: Stefan Dentro <sd11@sanger.ac.uk>
License: GPL-3
Expand Down
5 changes: 3 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ export(ascatNgsToBattenberg)
export(ascatToBattenberg)
export(collate_bb_subclones)
export(concat_files)
export(createHistFacetPlot)
export(createPng)
export(dpIn2vcf)
export(dumpCounts.Broad)
Expand All @@ -19,13 +18,15 @@ export(filterForDeaminase)
export(filterForSignature)
export(getTrinucleotideContext)
export(identifyKataegis)
export(meltFacetPlotData)
export(mut_cn_phasing)
export(mut_mut_phasing)
export(mutationBurdenToMutationCopyNumber)
export(mutationCopyNumberToMutationBurden)
export(parseFai)
export(parseIgnore)
export(plot_ccf_hist)
export(plot_mcn_hist)
export(plot_vaf_hist)
export(runGetDirichletProcessInfo)
export(split_by_chrom)
export(vcf2loci)
Expand Down
Empty file modified R/LoadData.R
100755 → 100644
Empty file.
422 changes: 422 additions & 0 deletions R/allelecount.R

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions R/copynumber.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ collate_bb_subclones = function(samplename, bb_subclones_file, gender, outfile)
cndata = cndata[!cndata$chr=="X",]
}

# Remove all segments witout a call
if (any(is.na(cndata$nMin1_A) | is.na(cndata$nMaj1_A))) {
cndata = cndata[!(is.na(cndata$nMin1_A) | is.na(cndata$nMaj1_A)),]
}

# Obtain the total copy number for each segment
totalcn = rep(0, dim(cndata)[1])
for (k in 1:nrow(cndata)) {
Expand Down
Empty file modified R/interconvertMutationBurdens.R
100755 → 100644
Empty file.
522 changes: 25 additions & 497 deletions R/preprocessing.R

Large diffs are not rendered by default.

121 changes: 44 additions & 77 deletions R/qualitycontrol.R
Original file line number Diff line number Diff line change
@@ -1,110 +1,77 @@
##############################################
# QC
##############################################
#' Create a PNG file
#' @param p The figure
#' @param filename Where to save the image
#' @param width Width
#' @param height Height
#' @export
createPng <- function(p, filename, width, height) {
png(filename=filename, width=width, height=height)
print(p)
dev.off()
}

#' @export
#' Convenience function used for creating figures
#' @noRd
meltFacetPlotData <- function(data, subsamplenames) {
d = as.data.frame(data)
colnames(d) = subsamplenames
data.m = reshape2::melt(d)
return(data.m)
}

#' @export
#' Template to create histograms
#' @noRd
createHistFacetPlot <- function(data, title, xlab, ylab, binwidth) {
p = ggplot2::ggplot(data) + ggplot2::aes(x=value, y=..count..) + ggplot2::geom_histogram(binwidth=binwidth, colour="black", fill="gray") + ggplot2::facet_grid(variable ~ .)
p = p + ggplot2::theme_bw(base_size=35) + ggplot2::ggtitle(title) + ggplot2::xlab(xlab) + ggplot2::ylab(ylab)
return(p)
}

#' Template to create box plots
#' @noRd
createBoxFacetPlot <- function(data, title, xlab, ylab) {
p = ggplot2::ggplot(data) + ggplot2::aes(x=variable, y=value) + ggplot2::geom_boxplot() + ggplot2::facet_grid(subsample ~ .)
p = p + ggplot2::theme_bw() + ggplot2::ggtitle(title) + ggplot2::xlab(xlab) + ggplot2::ylab(ylab)
return(p)
}

createQCDocument <- function(res, samplename, subsamplenames, outpath, cellularity) {
p = createHistFacetPlot(meltFacetPlotData(res$totalCopyNumber, subsamplenames), paste(samplename, "totalCopyNumber"), "Copynumber", "Count", binwidth=1)
createPng(p, paste(outpath, samplename, "_totalCopyNumber.png", sep=""), width=1500, height=500*length(subsamplenames))

p = createHistFacetPlot(meltFacetPlotData(res$mutation.copy.number, subsamplenames), paste(samplename, "mutation.copy.number"), "mutation.copy.number", "Count", binwidth=0.1)
#' Plot mutation copy number histogram
#'
#' @param data A DPClust input table
#' @param samplename Name of the sample
#' @param outdir Directory where the figure is to be stored
#' @author sd11
#' @export
plot_mcn_hist = function(data, samplename, outdir) {
p = createHistFacetPlot(meltFacetPlotData(data$mutation.copy.number, samplename), paste(samplename, "mutation.copy.number"), "mutation.copy.number", "Count", binwidth=0.1)
p = p + ggplot2::xlim(0,5)
createPng(p, paste(outpath, samplename, "_mutation.copy.number.png", sep=""), width=1500, height=500*length(subsamplenames))

p = createHistFacetPlot(meltFacetPlotData(res$copyNumberAdjustment, subsamplenames), paste(samplename, "copyNumberAdjustment"), "copyNumberAdjustment", "Count (log10)", binwidth=1)
p = p + ggplot2::scale_y_log10()
createPng(p, paste(outpath, samplename, "_copyNumberAdjustment.png", sep=""), width=1500, height=500*length(subsamplenames))

p = createHistFacetPlot(meltFacetPlotData(res$mutCount/(res$mutCount+res$WTCount), subsamplenames), paste(samplename, "alleleFrequency"), "Allele Frequency", "Count", binwidth=0.01)
createPng(p, paste(outpath, samplename, "_alleleFrequency.png", sep=""), width=1500, height=500*length(subsamplenames))

p = createHistFacetPlot(meltFacetPlotData(res$kappa, subsamplenames), paste(samplename, "kappa"), "Kappa", "Count", binwidth=0.01)
createPng(p, paste(outpath, samplename, "_kappa.png", sep=""), width=1500, height=500*length(subsamplenames))
# p = createHistFacetPlot(meltFacetPlotData(res$subclonal.fraction, subsamplenames), paste(samplename, "subclonal.fraction"), "Subclonal fraction", "Count", binwidth=0.01)
# createPng(p, paste(outpath, samplename, "_subclonal.fraction.png", sep=""), width=1500, height=500*length(subsamplenames))

p = createHistFacetPlot(meltFacetPlotData((res$mutCount+res$WTCount), subsamplenames), paste(samplename, "depth"), "Depth", "Count", binwidth=5)
createPng(p, paste(outpath, samplename, "_depth.png", sep=""), width=1500, height=500*length(subsamplenames))

# fractionOfCells = res$mutation.copy.number / res$copyNumberAdjustment
# meltFacetPlotData(fractionOfCells, subsamplenames)
p = createHistFacetPlot(meltFacetPlotData(res$subclonal.fraction, subsamplenames), paste(samplename, "Fraction Of Cells"), "Fraction of Cells", "Count", binwidth=0.05)
p = p + ggplot2::geom_vline(xintercept=0.5, colour="red", linetype="longdash", size=2) + ggplot2::geom_vline(xintercept=1.5, colour="red", linetype="longdash", size=2) + ggplot2::xlim(0,3) #scale_y_log10()
createPng(p, paste(outpath, samplename, "_fractionOfCells.png", sep=""), width=1500, height=500*length(subsamplenames))

# manualMutCopyNum = mutationBurdenToMutationCopyNumber(res$mutCount/(res$mutCount+res$WTCount),res$totalCopyNumber ,cellularity)
# p = createHistFacetPlot(meltFacetPlotData(manualMutCopyNum, subsamplenames), paste(samplename, "manualMutCopyNum"), "Mutation copy number", "Count", binwidth=0.1)
# createPng(p, paste(outpath, samplename, "_manualMutCopyNum.png", sep=""), width=500, height=250*length(subsamplenames))
#
# manualFractionOfCells = mutationBurdenToMutationCopyNumber(res$mutCount/(res$mutCount+res$WTCount),res$totalCopyNumber ,cellularity) / res$copyNumberAdjustment
# p = createHistFacetPlot(meltFacetPlotData(manualFractionOfCells, subsamplenames), paste(samplename, "manualFractionOfCells"), "Fraction of Cells", "Count", binwidth=0.01)
# createPng(p, paste(outpath, samplename, "_manualFractionOfCells.png", sep=""), width=1500, height=500*length(subsamplenames))

# Manually melt the data
d.m = data.frame()
for (i in 1:length(subsamplenames)) {
d.loc = data.frame(subsample=subsamplenames[i],variable=factor(dataset$chromosome[,i]), value=dataset$subclonal.fraction[,i])
d.m = rbind(d.m, d.loc)
}
p = createBoxFacetPlot(d.m, paste(samplename, "subclonal fraction per chrom"), "Chromosome", "Subclonal fraction")
createPng(p, paste(outpath, samplename, "_subclonalFractionPerChromosome.png", sep=""), width=1500, height=500*length(subsamplenames))

# Manually melt the data
d = as.data.frame(dataset$chromosome)
colnames(d) = subsamplenames
d.m = data.frame()
for (i in 1:ncol(d)) {
d.loc = d[dataset$subclonal.fraction > 1.5,i]
if(length(d.loc) > 0) {
d.m = rbind(d.m, data.frame(variable=colnames(d)[i], value=factor(d.loc)))
}
}

if (nrow(d.m) > 0) {
p = createHistFacetPlot(d.m, paste(samplename, "subclonal fraction > 1.5"), "Chromosome", "Count", binwidth=1)
createPng(p, paste(outpath, samplename, "_large.subclonal.fraction.by.chrom.png", sep=""), width=1500, height=500*length(subsamplenames))
}
createPng(p, file.path(outpath, paste(samplename, "_mutation.copy.number.png", sep="")), width=1500, height=500)
}

#'
#' Run the QC on specified data
#'
runQc = function(infile, datpath, outpath) {
sample2purity = read.table(infile, header=T, stringsAsFactors=F)
for (samplename in unique(sample2purity$sample)) {
print(samplename)
datafiles = sample2purity[sample2purity$sample==samplename,]$datafile
subsamples = sample2purity[sample2purity$sample==samplename,]$subsample
cellularity = sample2purity[sample2purity$sample==samplename,]$cellularity
if (file.exists(paste(datpath,datafiles, sep=""))) {
dataset = load.data(datpath,"",datafiles, cellularity=cellularity, Chromosome="chr", position="end", WT.count="WT.count", mut.count="mut.count", subclonal.CN="subclonal.CN", no.chrs.bearing.mut="no.chrs.bearing.mut", mutation.copy.number="mutation.copy.number", subclonal.fraction="subclonal.fraction", data_file_suffix="")
createQCDocument(dataset, samplename, subsamples, outpath, cellularity)
}
}
#' Plot cancer cell fraction histogram
#'
#' @param data A DPClust input table
#' @param samplename Name of the sample
#' @param outdir Directory where the figure is to be stored
#' @author sd11
#' @export
plot_ccf_hist = function(data, samplename, outdir) {
p = createHistFacetPlot(meltFacetPlotData(data$subclonal.fraction, samplename), paste(samplename, "Fraction Tumour Cells"), "Fraction Tumour Cells", "Count", binwidth=0.05)
p = p + ggplot2::xlim(0,1.5)
createPng(p, file.path(outpath, paste(samplename, "_fractionOfCells.png", sep="")), width=1500, height=500)
}

#' Plot allele frequency histogram
#'
#' @param data A DPClust input table
#' @param samplename Name of the sample
#' @param outdir Directory where the figure is to be stored
#' @author sd11
#' @export
plot_vaf_hist = function(data, samplename, outdir) {
p = createHistFacetPlot(meltFacetPlotData(data$mutCount/(data$mutCount+data$WTCount), samplename), paste(samplename, "alleleFrequency"), "Allele Frequency", "Count", binwidth=0.01)
createPng(p, file.path(outpath, paste(samplename, "_alleleFrequency.png", sep="")), width=1500, height=500)
}
72 changes: 72 additions & 0 deletions R/util.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#' Concatenate split files
#'
#' Convenience function to concatenate a series of files specified in a file of file names.
#' This function assumes all files have the same layout.
#' @param fofn A file of file names to be concatenated
#' @param inputdir Full path to where the input files are stored
#' @param outfile Full path to where the output should be written
#' @param haveHeader Boolean that specifies whether the input files have a header
#' @author sd11
#' @export
concat_files = function(fofn, inputdir, outfile, haveHeader) {

inputdir = paste(inputdir,"/", sep="")
list_of_files = read.table(fofn, stringsAsFactors=F, header=F)[,1]

output = data.frame()
for (infile in list_of_files) {
infile = paste(inputdir, infile, sep="")
# Check if file is there and it contains data
if (file.exists(infile) & file.info(infile)$size != 0) {
dat = read.delim(infile, header=haveHeader, quote=NULL, stringsAsFactors=F)
output = rbind(output, dat)
}
}

write.table(output, file=outfile, col.names=haveHeader, row.names=F, sep="\t", quote=F)
}

#' Split a file per chromosome
#'
#' Convenience function to split an input file per chromosome. All it requires is that
#' the infile has as first column chromosome specification. The output files will be named
#' outdir/prefixCHROMNUMBERpostfix
#' @param infile The file to be split
#' @param prefix Prefix of the output file
#' @param postfix Postfix of the output file
#' @param outdir Directory where the output files are to be written
#' @param chrom_file A simple list of chromosomes to be considered
#' @author sd11
#' @export
split_by_chrom = function(infile, prefix, postfix, outdir, chrom_file) {
outdir = paste(outdir, "/", sep="")

# Check if there are lines in the file, otherwise it will crash this script
if (file.info(infile)$size == 0) {
print("No lines in loci file")
q(save="no")
}

loci = read.delim(infile, stringsAsFactors=F, header=F)

chroms = read.delim(chrom_file, stringsAsFactors=F, header=F)

for (i in 1:nrow(chroms)) {
selection = loci[loci[,1]==chroms[i,1],]
chrom_id = chroms[i,2]
write.table(selection, file=paste(outdir, prefix, chrom_id, postfix, sep=""), quote=F, row.names=F, col.names=F, sep="\t")
}
}

#' Calculate power to call subclones.
#'
#' This function calculates the
#' average number of reads per clonal chromosome copy.
#' @param purity The sample purity
#' @param ploidy The tumour ploidy
#' @param coverage The tumour sample coverage
#' @return The average reads per chromosome copy, a.k.a. power
#' @author sd11
calc_power = function(purity, ploidy, coverage) {
return(round((purity) / (purity*ploidy + (1-purity)*2) * coverage, 3))
}
Loading

0 comments on commit 30eb359

Please sign in to comment.