-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
30 changed files
with
2,351 additions
and
602 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,110 +1,77 @@ | ||
############################################## | ||
# QC | ||
############################################## | ||
#' Create a PNG file | ||
#' @param p The figure | ||
#' @param filename Where to save the image | ||
#' @param width Width | ||
#' @param height Height | ||
#' @export | ||
createPng <- function(p, filename, width, height) { | ||
png(filename=filename, width=width, height=height) | ||
print(p) | ||
dev.off() | ||
} | ||
|
||
#' @export | ||
#' Convenience function used for creating figures | ||
#' @noRd | ||
meltFacetPlotData <- function(data, subsamplenames) { | ||
d = as.data.frame(data) | ||
colnames(d) = subsamplenames | ||
data.m = reshape2::melt(d) | ||
return(data.m) | ||
} | ||
|
||
#' @export | ||
#' Template to create histograms | ||
#' @noRd | ||
createHistFacetPlot <- function(data, title, xlab, ylab, binwidth) { | ||
p = ggplot2::ggplot(data) + ggplot2::aes(x=value, y=..count..) + ggplot2::geom_histogram(binwidth=binwidth, colour="black", fill="gray") + ggplot2::facet_grid(variable ~ .) | ||
p = p + ggplot2::theme_bw(base_size=35) + ggplot2::ggtitle(title) + ggplot2::xlab(xlab) + ggplot2::ylab(ylab) | ||
return(p) | ||
} | ||
|
||
#' Template to create box plots | ||
#' @noRd | ||
createBoxFacetPlot <- function(data, title, xlab, ylab) { | ||
p = ggplot2::ggplot(data) + ggplot2::aes(x=variable, y=value) + ggplot2::geom_boxplot() + ggplot2::facet_grid(subsample ~ .) | ||
p = p + ggplot2::theme_bw() + ggplot2::ggtitle(title) + ggplot2::xlab(xlab) + ggplot2::ylab(ylab) | ||
return(p) | ||
} | ||
|
||
createQCDocument <- function(res, samplename, subsamplenames, outpath, cellularity) { | ||
p = createHistFacetPlot(meltFacetPlotData(res$totalCopyNumber, subsamplenames), paste(samplename, "totalCopyNumber"), "Copynumber", "Count", binwidth=1) | ||
createPng(p, paste(outpath, samplename, "_totalCopyNumber.png", sep=""), width=1500, height=500*length(subsamplenames)) | ||
|
||
p = createHistFacetPlot(meltFacetPlotData(res$mutation.copy.number, subsamplenames), paste(samplename, "mutation.copy.number"), "mutation.copy.number", "Count", binwidth=0.1) | ||
#' Plot mutation copy number histogram | ||
#' | ||
#' @param data A DPClust input table | ||
#' @param samplename Name of the sample | ||
#' @param outdir Directory where the figure is to be stored | ||
#' @author sd11 | ||
#' @export | ||
plot_mcn_hist = function(data, samplename, outdir) { | ||
p = createHistFacetPlot(meltFacetPlotData(data$mutation.copy.number, samplename), paste(samplename, "mutation.copy.number"), "mutation.copy.number", "Count", binwidth=0.1) | ||
p = p + ggplot2::xlim(0,5) | ||
createPng(p, paste(outpath, samplename, "_mutation.copy.number.png", sep=""), width=1500, height=500*length(subsamplenames)) | ||
|
||
p = createHistFacetPlot(meltFacetPlotData(res$copyNumberAdjustment, subsamplenames), paste(samplename, "copyNumberAdjustment"), "copyNumberAdjustment", "Count (log10)", binwidth=1) | ||
p = p + ggplot2::scale_y_log10() | ||
createPng(p, paste(outpath, samplename, "_copyNumberAdjustment.png", sep=""), width=1500, height=500*length(subsamplenames)) | ||
|
||
p = createHistFacetPlot(meltFacetPlotData(res$mutCount/(res$mutCount+res$WTCount), subsamplenames), paste(samplename, "alleleFrequency"), "Allele Frequency", "Count", binwidth=0.01) | ||
createPng(p, paste(outpath, samplename, "_alleleFrequency.png", sep=""), width=1500, height=500*length(subsamplenames)) | ||
|
||
p = createHistFacetPlot(meltFacetPlotData(res$kappa, subsamplenames), paste(samplename, "kappa"), "Kappa", "Count", binwidth=0.01) | ||
createPng(p, paste(outpath, samplename, "_kappa.png", sep=""), width=1500, height=500*length(subsamplenames)) | ||
# p = createHistFacetPlot(meltFacetPlotData(res$subclonal.fraction, subsamplenames), paste(samplename, "subclonal.fraction"), "Subclonal fraction", "Count", binwidth=0.01) | ||
# createPng(p, paste(outpath, samplename, "_subclonal.fraction.png", sep=""), width=1500, height=500*length(subsamplenames)) | ||
|
||
p = createHistFacetPlot(meltFacetPlotData((res$mutCount+res$WTCount), subsamplenames), paste(samplename, "depth"), "Depth", "Count", binwidth=5) | ||
createPng(p, paste(outpath, samplename, "_depth.png", sep=""), width=1500, height=500*length(subsamplenames)) | ||
|
||
# fractionOfCells = res$mutation.copy.number / res$copyNumberAdjustment | ||
# meltFacetPlotData(fractionOfCells, subsamplenames) | ||
p = createHistFacetPlot(meltFacetPlotData(res$subclonal.fraction, subsamplenames), paste(samplename, "Fraction Of Cells"), "Fraction of Cells", "Count", binwidth=0.05) | ||
p = p + ggplot2::geom_vline(xintercept=0.5, colour="red", linetype="longdash", size=2) + ggplot2::geom_vline(xintercept=1.5, colour="red", linetype="longdash", size=2) + ggplot2::xlim(0,3) #scale_y_log10() | ||
createPng(p, paste(outpath, samplename, "_fractionOfCells.png", sep=""), width=1500, height=500*length(subsamplenames)) | ||
|
||
# manualMutCopyNum = mutationBurdenToMutationCopyNumber(res$mutCount/(res$mutCount+res$WTCount),res$totalCopyNumber ,cellularity) | ||
# p = createHistFacetPlot(meltFacetPlotData(manualMutCopyNum, subsamplenames), paste(samplename, "manualMutCopyNum"), "Mutation copy number", "Count", binwidth=0.1) | ||
# createPng(p, paste(outpath, samplename, "_manualMutCopyNum.png", sep=""), width=500, height=250*length(subsamplenames)) | ||
# | ||
# manualFractionOfCells = mutationBurdenToMutationCopyNumber(res$mutCount/(res$mutCount+res$WTCount),res$totalCopyNumber ,cellularity) / res$copyNumberAdjustment | ||
# p = createHistFacetPlot(meltFacetPlotData(manualFractionOfCells, subsamplenames), paste(samplename, "manualFractionOfCells"), "Fraction of Cells", "Count", binwidth=0.01) | ||
# createPng(p, paste(outpath, samplename, "_manualFractionOfCells.png", sep=""), width=1500, height=500*length(subsamplenames)) | ||
|
||
# Manually melt the data | ||
d.m = data.frame() | ||
for (i in 1:length(subsamplenames)) { | ||
d.loc = data.frame(subsample=subsamplenames[i],variable=factor(dataset$chromosome[,i]), value=dataset$subclonal.fraction[,i]) | ||
d.m = rbind(d.m, d.loc) | ||
} | ||
p = createBoxFacetPlot(d.m, paste(samplename, "subclonal fraction per chrom"), "Chromosome", "Subclonal fraction") | ||
createPng(p, paste(outpath, samplename, "_subclonalFractionPerChromosome.png", sep=""), width=1500, height=500*length(subsamplenames)) | ||
|
||
# Manually melt the data | ||
d = as.data.frame(dataset$chromosome) | ||
colnames(d) = subsamplenames | ||
d.m = data.frame() | ||
for (i in 1:ncol(d)) { | ||
d.loc = d[dataset$subclonal.fraction > 1.5,i] | ||
if(length(d.loc) > 0) { | ||
d.m = rbind(d.m, data.frame(variable=colnames(d)[i], value=factor(d.loc))) | ||
} | ||
} | ||
|
||
if (nrow(d.m) > 0) { | ||
p = createHistFacetPlot(d.m, paste(samplename, "subclonal fraction > 1.5"), "Chromosome", "Count", binwidth=1) | ||
createPng(p, paste(outpath, samplename, "_large.subclonal.fraction.by.chrom.png", sep=""), width=1500, height=500*length(subsamplenames)) | ||
} | ||
createPng(p, file.path(outpath, paste(samplename, "_mutation.copy.number.png", sep="")), width=1500, height=500) | ||
} | ||
|
||
#' | ||
#' Run the QC on specified data | ||
#' | ||
runQc = function(infile, datpath, outpath) { | ||
sample2purity = read.table(infile, header=T, stringsAsFactors=F) | ||
for (samplename in unique(sample2purity$sample)) { | ||
print(samplename) | ||
datafiles = sample2purity[sample2purity$sample==samplename,]$datafile | ||
subsamples = sample2purity[sample2purity$sample==samplename,]$subsample | ||
cellularity = sample2purity[sample2purity$sample==samplename,]$cellularity | ||
if (file.exists(paste(datpath,datafiles, sep=""))) { | ||
dataset = load.data(datpath,"",datafiles, cellularity=cellularity, Chromosome="chr", position="end", WT.count="WT.count", mut.count="mut.count", subclonal.CN="subclonal.CN", no.chrs.bearing.mut="no.chrs.bearing.mut", mutation.copy.number="mutation.copy.number", subclonal.fraction="subclonal.fraction", data_file_suffix="") | ||
createQCDocument(dataset, samplename, subsamples, outpath, cellularity) | ||
} | ||
} | ||
#' Plot cancer cell fraction histogram | ||
#' | ||
#' @param data A DPClust input table | ||
#' @param samplename Name of the sample | ||
#' @param outdir Directory where the figure is to be stored | ||
#' @author sd11 | ||
#' @export | ||
plot_ccf_hist = function(data, samplename, outdir) { | ||
p = createHistFacetPlot(meltFacetPlotData(data$subclonal.fraction, samplename), paste(samplename, "Fraction Tumour Cells"), "Fraction Tumour Cells", "Count", binwidth=0.05) | ||
p = p + ggplot2::xlim(0,1.5) | ||
createPng(p, file.path(outpath, paste(samplename, "_fractionOfCells.png", sep="")), width=1500, height=500) | ||
} | ||
|
||
#' Plot allele frequency histogram | ||
#' | ||
#' @param data A DPClust input table | ||
#' @param samplename Name of the sample | ||
#' @param outdir Directory where the figure is to be stored | ||
#' @author sd11 | ||
#' @export | ||
plot_vaf_hist = function(data, samplename, outdir) { | ||
p = createHistFacetPlot(meltFacetPlotData(data$mutCount/(data$mutCount+data$WTCount), samplename), paste(samplename, "alleleFrequency"), "Allele Frequency", "Count", binwidth=0.01) | ||
createPng(p, file.path(outpath, paste(samplename, "_alleleFrequency.png", sep="")), width=1500, height=500) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#' Concatenate split files | ||
#' | ||
#' Convenience function to concatenate a series of files specified in a file of file names. | ||
#' This function assumes all files have the same layout. | ||
#' @param fofn A file of file names to be concatenated | ||
#' @param inputdir Full path to where the input files are stored | ||
#' @param outfile Full path to where the output should be written | ||
#' @param haveHeader Boolean that specifies whether the input files have a header | ||
#' @author sd11 | ||
#' @export | ||
concat_files = function(fofn, inputdir, outfile, haveHeader) { | ||
|
||
inputdir = paste(inputdir,"/", sep="") | ||
list_of_files = read.table(fofn, stringsAsFactors=F, header=F)[,1] | ||
|
||
output = data.frame() | ||
for (infile in list_of_files) { | ||
infile = paste(inputdir, infile, sep="") | ||
# Check if file is there and it contains data | ||
if (file.exists(infile) & file.info(infile)$size != 0) { | ||
dat = read.delim(infile, header=haveHeader, quote=NULL, stringsAsFactors=F) | ||
output = rbind(output, dat) | ||
} | ||
} | ||
|
||
write.table(output, file=outfile, col.names=haveHeader, row.names=F, sep="\t", quote=F) | ||
} | ||
|
||
#' Split a file per chromosome | ||
#' | ||
#' Convenience function to split an input file per chromosome. All it requires is that | ||
#' the infile has as first column chromosome specification. The output files will be named | ||
#' outdir/prefixCHROMNUMBERpostfix | ||
#' @param infile The file to be split | ||
#' @param prefix Prefix of the output file | ||
#' @param postfix Postfix of the output file | ||
#' @param outdir Directory where the output files are to be written | ||
#' @param chrom_file A simple list of chromosomes to be considered | ||
#' @author sd11 | ||
#' @export | ||
split_by_chrom = function(infile, prefix, postfix, outdir, chrom_file) { | ||
outdir = paste(outdir, "/", sep="") | ||
|
||
# Check if there are lines in the file, otherwise it will crash this script | ||
if (file.info(infile)$size == 0) { | ||
print("No lines in loci file") | ||
q(save="no") | ||
} | ||
|
||
loci = read.delim(infile, stringsAsFactors=F, header=F) | ||
|
||
chroms = read.delim(chrom_file, stringsAsFactors=F, header=F) | ||
|
||
for (i in 1:nrow(chroms)) { | ||
selection = loci[loci[,1]==chroms[i,1],] | ||
chrom_id = chroms[i,2] | ||
write.table(selection, file=paste(outdir, prefix, chrom_id, postfix, sep=""), quote=F, row.names=F, col.names=F, sep="\t") | ||
} | ||
} | ||
|
||
#' Calculate power to call subclones. | ||
#' | ||
#' This function calculates the | ||
#' average number of reads per clonal chromosome copy. | ||
#' @param purity The sample purity | ||
#' @param ploidy The tumour ploidy | ||
#' @param coverage The tumour sample coverage | ||
#' @return The average reads per chromosome copy, a.k.a. power | ||
#' @author sd11 | ||
calc_power = function(purity, ploidy, coverage) { | ||
return(round((purity) / (purity*ploidy + (1-purity)*2) * coverage, 3)) | ||
} |
Oops, something went wrong.