.Rhistory

}
indices <- which(gene.average < expr.cutoff)
if(length(indices) >0) {
object@data <- object.raw.data[-1 * indices, , drop=FALSE]
object.data <- object@data
object@annotation.filt <- object@annotation[-1 * indices, , drop=FALSE]
} else {
object@data <- object.raw.data
object.data <- object@data
object@annotation.filt <- object@annotation
}
if(is.null( object.data) || nrow( object.data) < 1 || ncol( object.data) < 1){  stop("Error, data has no rows or columns") }
if(matrix.type == "raw") {
cs <- colSums(object.data)
object.data <- sweep(object.data, STATS=cs, MARGIN=2, FUN="/")
normalize_factor <-  median(cs)
object.data  <- object.data * normalize_factor
object.data <- log2(object.data + 1)
#centered.data <-  t(scale(t(object.data)))
}
centered.data <-  t(apply(t(object.data), 2, function(y) (y - mean(y)) / sd(y) ^ as.logical(sd(y))))
object@centered.data <- AverageReference(data = centered.data, ref_ids = object@control.sample.ids)
lower_bound <- mean(apply(object@centered.data, 2,
function(x) quantile(x, na.rm=TRUE)[[1]]))
upper_bound <- mean(apply(object@centered.data, 2,
function(x) quantile(x, na.rm=TRUE)[[5]]))
threshold = mean(abs(c(lower_bound, upper_bound)))
object@centered.data[object@centered.data > threshold] <- threshold
object@centered.data[object@centered.data < (-1 * threshold)] <- -1 * threshold
# object <- ProcessData(object)
return(object)
}
object <- CreateCasperObject(raw.data=data,loh.name.mapping=loh.name.mapping,
sequencing.type="single-cell",
cnv.scale=3, loh.scale=3,expr.cutoff=0.1,matrix.type="normalized", filter="median",
annotation=annotation, method="iterative", loh=loh,
control.sample.ids="REF", cytoband=cytoband)
object <- ProcessData(object)
if (object@filter=="median")   object <- PerformMedianFilterByChr(object)
if (object@filter=="mean")   object <- PerformMeanFilterByChr(object)
object <- CenterSmooth(object)
object <- ControlNormalize(object)
CenterSmooth
ControlNormalize
#' @title CreateCasperObject
#'
#' @param raw.data  the matrix of genes (rows) vs. cells (columns) containing the raw counts
#'
#' @param annotation data.frame containing positions of each gene along each chromosome in the genome
#'
#' @param control.sample.ids  vector containing the  reference (normal) cell (sample) names
#'
#' @param cytoband cytoband information downloaded from UCSC hg19: http://hgdownload.cse.ucsc.edu/goldenpath/hg19/database/cytoBand.txt.gz hg38:http://hgdownload.cse.ucsc.edu/goldenpath/hg38/database/cytoBand.txt.gz
#'
#' @param loh.name.mapping  contains the cell (sample) name and the matching baf signal sample name
#'
#' @param cnv.scale  maximum expression scale
#'
#' @param loh.scale  maximum baf scale
#'
#' @param method analysis type: itereative  or fixed (default: iterative)
#'
#' @param loh The original baf signal
#'
#' @param sequencing.type sequencing.type sequencing type: bulk or single-cell
#'
#' @param expr.cutoff expression cutoff for lowly expressed genes
#'
#' @param log.transformed indicates if the data log2 transformed or not. (default:TRUE)
#'
#' @param window.length window length used for median filtering (default: 50)
#'
#' @param length.iterations increase in window length at each scale iteration (default: 50)
#'
#' @param vis.bound threshold for control normalized data for better visualization (default: 2)
#'
#' @param genomeVersion genomeVersion: hg19 or hg38 (default: hg19)
#'
#' @description Creation of a casper object.
#'
#' @return casper
#'
#' @export
#'
CreateCasperObject <- function(raw.data, annotation, control.sample.ids, cytoband, loh.name.mapping, cnv.scale, loh.scale,
method, loh, project = "casperProject", matrix.type="raw", sequencing.type, expr.cutoff = 0.1, log.transformed = TRUE,
window.length = 50, length.iterations = 50,  genomeVersion = "hg19", filter= "median",
...) {
object <- new(Class = "casper", raw.data = raw.data, loh = loh, annotation = annotation, sequencing.type = sequencing.type,
control.sample.ids = control.sample.ids, project.name = project,  cytoband = cytoband, loh.name.mapping = loh.name.mapping,
cnv.scale = cnv.scale, loh.scale = loh.scale, matrix.type = matrix.type, method = method, window.length = window.length, length.iterations = length.iterations,
genomeVersion = genomeVersion, filter = filter)
object.raw.data <- object@raw.data
if(matrix.type == "normalized") {
gene.average <- rowMeans(((2^object.raw.data) - 1))
}
if(matrix.type == "raw") {
gene.average <- rowMeans(object.raw.data)
}
indices <- which(gene.average < expr.cutoff)
if(length(indices) >0) {
object@data <- object.raw.data[-1 * indices, , drop=FALSE]
object.data <- object@data
object@annotation.filt <- object@annotation[-1 * indices, , drop=FALSE]
} else {
object@data <- object.raw.data
object.data <- object@data
object@annotation.filt <- object@annotation
}
if(is.null( object.data) || nrow( object.data) < 1 || ncol( object.data) < 1){  stop("Error, data has no rows or columns") }
if(matrix.type == "raw") {
cs <- colSums(object.data)
object.data <- sweep(object.data, STATS=cs, MARGIN=2, FUN="/")
normalize_factor <-  median(cs)
object.data  <- object.data * normalize_factor
object.data <- log2(object.data + 1)
#centered.data <-  t(scale(t(object.data)))
}
centered.data <-  t(apply(t(object.data), 2, function(y) (y - mean(y)) / sd(y) ^ as.logical(sd(y))))
object@centered.data <- AverageReference(data = centered.data, ref_ids = object@control.sample.ids)
lower_bound <- mean(apply(object@centered.data, 2,
function(x) quantile(x, na.rm=TRUE)[[1]]))
upper_bound <- mean(apply(object@centered.data, 2,
function(x) quantile(x, na.rm=TRUE)[[5]]))
threshold = mean(abs(c(lower_bound, upper_bound)))
object@centered.data[object@centered.data > threshold] <- threshold
object@centered.data[object@centered.data < (-1 * threshold)] <- -1 * threshold
object <- ProcessData(object)
return(object)
}
#' @title ProcessData()
#'
#' @description Processing expression signal. Step 1. Recursively iterative median filtering  Step 2. Center Normalization Step 3. Control Normalization
#'
#' @param object casper object
#'
#' @return object
#'
#' @export
#'
#'
ProcessData <- function(object) {
if (object@filter=="median")   object <- PerformMedianFilterByChr(object)
if (object@filter=="mean")   object <- PerformMeanFilterByChr(object)
object <- CenterSmooth(object)
#object <- ControlNormalize(object)
return(object)
}
object <- CreateCasperObject(raw.data=data,loh.name.mapping=loh.name.mapping,
sequencing.type="single-cell",
cnv.scale=3, loh.scale=3,expr.cutoff=0.1,matrix.type="normalized", filter="median",
annotation=annotation, method="iterative", loh=loh,
control.sample.ids="REF", cytoband=cytoband)
object <- ControlNormalize(object)
ProcessData <- function(object) {
if (object@filter=="median")   object <- PerformMedianFilterByChr(object)
if (object@filter=="mean")   object <- PerformMeanFilterByChr(object)
object <- CenterSmooth(object)
object <- ControlNormalize(object)
return(object)
}
object <- CreateCasperObject(raw.data=data,loh.name.mapping=loh.name.mapping,
sequencing.type="single-cell",
cnv.scale=3, loh.scale=3,expr.cutoff=0.1,matrix.type="normalized", filter="median",
annotation=annotation, method="iterative", loh=loh,
control.sample.ids="REF", cytoband=cytoband)
## plot median filtered gene expression matrix
plotHeatmap(object, fileName="heatmap.png", cnv.scale= 3, cluster_cols = F, cluster_rows = T, show_rownames = T, only_soi = T)
object <- CreateCasperObject(raw.data=data,loh.name.mapping=loh.name.mapping,
sequencing.type="single-cell",
cnv.scale=3, loh.scale=3,expr.cutoff=0.1,matrix.type="normalized", filter="mean",
annotation=annotation, method="iterative", loh=loh,
control.sample.ids="REF", cytoband=cytoband)
## plot median filtered gene expression matrix
plotHeatmap(object, fileName="heatmapMean.png", cnv.scale= 3, cluster_cols = F, cluster_rows = T, show_rownames = T, only_soi = T)
plotHeatmap
plotHeatmapobject <- CreateCasperObject(raw.data=data,loh.name.mapping=loh.name.mapping,
sequencing.type="single-cell",
cnv.scale=3, loh.scale=3,expr.cutoff=0.1,matrix.type="normalized", filter="median",
annotation=annotation, method="iterative", loh=loh,
control.sample.ids="REF", cytoband=cytoband)
object <- CreateCasperObject(raw.data=data,loh.name.mapping=loh.name.mapping,
sequencing.type="single-cell",
cnv.scale=3, loh.scale=3,expr.cutoff=0.1,matrix.type="normalized", filter="median",
annotation=annotation, method="iterative", loh=loh,
control.sample.ids="REF", cytoband=cytoband)
## plot median filtered gene expression matrix
plotHeatmap(object, fileName="heatmapMean.png", cnv.scale= 3, cluster_cols = F, cluster_rows = T, show_rownames = T, only_soi = T)
library(Seurat)
library(CaSpER)
data("hg19_cytoband")
# expression data is downloaded from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE110499
counts  <- read.delim("GSE110499_GEO_processed_MM_10X_raw_UMI_count_martix.txt", stringsAsFactor=F, header=T)
rownames(counts) <- counts[, 1]
counts <- counts[, -1]
mm135 <- CreateSeuratObject(counts = counts, project = "mm135", min.cells = 3, min.features = 200)
mm135[["percent.mt"]] <- PercentageFeatureSet(mm135, pattern = "^MT-")
mm135 <- subset(mm135, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
mm135 <- NormalizeData(mm135 , scale.factor = 1e6, normalization.method = "RC")
mm135 <- FindVariableFeatures(mm135, do.plot = T, nfeatures = 1000)
mm135 <- ScaleData(mm135)
mm135 <- RunPCA(mm135, features = VariableFeatures(object = mm135),npcs = 100)
mm135 <- RunTSNE(mm135, dims.use = 1:10)
DimPlot(pbmc, reduction = "tsne")
FeaturePlot(pbmc, features = c("SDC1", "CD38"))
mm135 <- FindNeighbors(mm135, dims = 1:10)
mm135 <- FindClusters(mm135, resolution = 0.5)
DimPlot(pbmc, reduction = "tsne", label=T)
log.ge <- as.matrix(mm135@assays$RNA@data)
control <- names(Idents(mm135) )[Idents(mm135) %in% c(2,7)]
mm <- names(Idents(mm135) )[Idents(mm135) %in% c(0, 1, 3, 4)]
genes <- rownames(log.ge)
annotation <- generateAnnotation(id_type="hgnc_symbol", genes=genes, centromere=centromere, ishg19 = T)
log.ge <- log.ge[match( annotation$Gene,rownames(log.ge)) , ]
rownames(log.ge) <- annotation$Gene
log.ge <- log2(log.ge +1)
load("maf.rda") ## from https://github.com/akdess/CaSpER/blob/master/data/maf.rda
loh<- list()
loh[[1]] <- maf
names(loh) <- "MM135"
loh.name.mapping <- data.frame (loh.name= "MM135" , sample.name=colnames(log.ge))
object <- CreateCasperObject(raw.data=log.ge,loh.name.mapping=loh.name.mapping, sequencing.type="single-cell",
cnv.scale=3, loh.scale=3,
expr.cutoff=0.1, filter="median", matrix.type="normalized",
annotation=annotation, method="iterative", loh=loh,
control.sample.ids=control, cytoband=cytoband)
counts  <- read.delim("GSE110499_GEO_processed_MM_10X_raw_UMI_count_matrix.txt", stringsAsFactor=F, header=T)
rownames(counts) <- counts[, 1]
counts <- counts[, -1]
mm135 <- CreateSeuratObject(counts = counts, project = "mm135", min.cells = 3, min.features = 200)
mm135[["percent.mt"]] <- PercentageFeatureSet(mm135, pattern = "^MT-")
mm135 <- subset(mm135, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
mm135 <- NormalizeData(mm135 , scale.factor = 1e6, normalization.method = "RC")
mm135 <- FindVariableFeatures(mm135, do.plot = T, nfeatures = 1000)
mm135 <- ScaleData(mm135)
mm135 <- RunPCA(mm135, features = VariableFeatures(object = mm135),npcs = 100)
mm135 <- RunTSNE(mm135, dims.use = 1:10)
DimPlot(pbmc, reduction = "tsne")
FeaturePlot(pbmc, features = c("SDC1", "CD38"))
mm135 <- FindNeighbors(mm135, dims = 1:10)
mm135 <- FindClusters(mm135, resolution = 0.5)
DimPlot(pbmc, reduction = "tsne", label=T)
log.ge <- as.matrix(mm135@assays$RNA@data)
control <- names(Idents(mm135) )[Idents(mm135) %in% c(2,7)]
mm <- names(Idents(mm135) )[Idents(mm135) %in% c(0, 1, 3, 4)]
genes <- rownames(log.ge)
annotation <- generateAnnotation(id_type="hgnc_symbol", genes=genes, centromere=centromere, ishg19 = T)
log.ge <- log.ge[match( annotation$Gene,rownames(log.ge)) , ]
rownames(log.ge) <- annotation$Gene
log.ge <- log2(log.ge +1)
library(Seurat)
library(CaSpER)
data("hg19_cytoband")
# expression data is downloaded from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE110499
counts  <- read.delim("GSE110499_GEO_processed_MM_10X_raw_UMI_count_matrix.txt", stringsAsFactor=F, header=T)
rownames(counts) <- counts[, 1]
counts <- counts[, -1]
mm135 <- CreateSeuratObject(counts = counts, project = "mm135", min.cells = 3, min.features = 200)
mm135[["percent.mt"]] <- PercentageFeatureSet(mm135, pattern = "^MT-")
mm135 <- subset(mm135, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
mm135 <- NormalizeData(mm135 , scale.factor = 1e6, normalization.method = "RC")
mm135 <- FindVariableFeatures(mm135, do.plot = T, nfeatures = 1000)
mm135 <- ScaleData(mm135)
mm135 <- RunPCA(mm135, features = VariableFeatures(object = mm135),npcs = 100)
mm135 <- RunTSNE(mm135, dims.use = 1:10)
DimPlot(pbmc, reduction = "tsne")
FeaturePlot(pbmc, features = c("SDC1", "CD38"))
mm135 <- FindNeighbors(mm135, dims = 1:10)
mm135 <- FindClusters(mm135, resolution = 0.5)
DimPlot(pbmc, reduction = "tsne", label=T)
log.ge <- as.matrix(mm135@assays$RNA@data)
control <- names(Idents(mm135) )[Idents(mm135) %in% c(2,7)]
mm <- names(Idents(mm135) )[Idents(mm135) %in% c(0, 1, 3, 4)]
Sys.getenv("PATH")
require(devtools)
install_github("akdess/CaSpER")
require(devtools)
install_github("akdess/CaSpER")
require(devtools)
install_github("akdess/CaSpER", force=T)
require(devtools)
install_github("akdess/CaSpER", force=T)
?install_github
require(devtools)
install_github("akdess/CaSpER", force=TRUE
)
require(devtools)
install_github("akdess/CaSpER")
require(devtools)
install_github("akdess/CaSpER")
library(CaSpER)
library(CaSpER)
## "yale_meningioma.rda" contains the following objects:
## data: normalized gene expression matrix
## loh.name.mapping: data.frame for mapping loh files to expression files
## annotation
## control.sample.ids: samples that are used as normal
## samps: sample information
## genoMat: genotyping large scale CNV event summary 1: amplification, -1:deletion, 0: neutral
data("yale_meningioma")
## "hg19_cytoband.rda" contains the following objects:
## cytoband: hg19 cytoband information
## centromere: hg19 centromere information
data("hg19_cytoband")
data <- yale_meningioma$data
loh <-  yale_meningioma$loh
loh.name.mapping <-  yale_meningioma$loh.name.mapping
control.sample.ids <-  yale_meningioma$control.sample.ids
genoMat <-  yale_meningioma$genoMat
samps <-  yale_meningioma$samps
## generate annotation data.frame
#curl -s "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/cytoBand.txt.gz" | gunzip -c | grep acen | head
annotation <- generateAnnotation(id_type="ensembl_gene_id", genes=rownames(data), ishg19=T, centromere,host="uswest.ensembl.org")
data <- data[match( annotation$Gene,rownames(data)), ]
bject <- CreateCasperObject(raw.data=data,loh.name.mapping=loh.name.mapping, sequencing.type="bulk",
cnv.scale=3, loh.scale=3, matrix.type="normalized", expr.cutoff=4.5,
annotation=annotation, method="iterative", loh=loh, filter="median",
control.sample.ids=control.sample.ids, cytoband=cytoband)
## runCaSpER
final.objects <- runCaSpER(object, removeCentromere=T, cytoband=cytoband, method="iterative")
object <- CreateCasperObject(raw.data=data,loh.name.mapping=loh.name.mapping, sequencing.type="bulk",
cnv.scale=3, loh.scale=3, matrix.type="normalized", expr.cutoff=4.5,
annotation=annotation, method="iterative", loh=loh, filter="median",
control.sample.ids=control.sample.ids, cytoband=cytoband)
## runCaSpER
final.objects <- runCaSpER(object, removeCentromere=T, cytoband=cytoband, method="iterative")
order.sampleNames <- c("MN-1171",  "MN-60835" ,"MN-1236" , "MN-1237" , "MN-1137" , "MN-1161" , "MN-60" ,   "MN-5" )
## plot median filtered gene expression matrix
obj <- final.objects[[9]]
plotHeatmap(object=obj, fileName="heatmap.png",cnv.scale= 3, cluster_cols = F, cluster_rows = T, show_rownames = T, only_soi = T)
plotHeatmap
##  large scale event summary
finalChrMat <- extractLargeScaleEvents (final.objects, thr=0.75)
common <- intersect(order.sampleNames, intersect(rownames(finalChrMat), rownames(genoMat)))
finalChrMat <- finalChrMat[match(common, rownames(finalChrMat)), ]
genoMat <- genoMat[match(common, rownames(genoMat)), ]
## calculate TPR and FPR using genotyping array as gold standard
calcROC(chrMat=finalChrMat, chrMat2=genoMat)
library(CaSpER)
require(Matrix)
library(Seurat)
setwd("Z:\\SBMI_Houston\\Akdes Serin\\mtx\\")
filtered.counts <- Read10X("Z:\\SBMI_Houston\\Akdes Serin\\mtx\\Post_analysis\\filtered_feature_bc_matrix")
postal <- CreateSeuratObject(counts = filtered.counts, min.cells = 3, min.features = 200)
postal[["percent.mt"]] <- PercentageFeatureSet(postal, pattern = "^MT-")
postal <- subset(postal, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
# standard log-normalization
postal <- NormalizeData(postal,  scale.factor = 1e6, normalization.method = "RC")
#postal <- NormalizeData(postal, method="LogNormalize")
# choose ~1k variable features
postal <- FindVariableFeatures(postal)
library(CaSpER)
require(Matrix)
library(Seurat)
setwd("Z:\\SBMI_Houston\\Akdes Serin\\mtx\\")
filtered.counts <- Read10X("Z:\\SBMI_Houston\\Akdes Serin\\mtx\\Post_analysis\\filtered_feature_bc_matrix")
postal <- CreateSeuratObject(counts = filtered.counts, min.cells = 3, min.features = 200)
postal[["percent.mt"]] <- PercentageFeatureSet(postal, pattern = "^MT-")
postal <- subset(postal, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
# standard log-normalization
postal <- NormalizeData(postal,  scale.factor = 1e6, normalization.method = "RC")
#postal <- NormalizeData(postal, method="LogNormalize")
# choose ~1k variable features
postal <- FindVariableFeatures(postal)
# standard scaling (no regression)
postal <- ScaleData(postal)
postal <- RunPCA(postal, npcs = 30, verbose = FALSE)
# t-SNE and Clustering
postal <- RunTSNE(postal, dims = 1:25, method = "FIt-SNE")
postal <- RunUMAP(postal, dims = 1:25, reduction = "pca")
postal <- FindNeighbors(postal, reduction = "pca", dims = 1:20)
postal <- FindClusters(postal, resolution = 0.5)
p2 <- DimPlot(postal, label = TRUE)
p2
http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/cytoBand.txt.gz
data <- as.matrix(postal@assays$RNA@counts)
#data <- log2(data+1)
FeaturePlot(postal, features = "PTPRC", min.cutoff = "q9")
p2
http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/cytoBand.txt.gz
control <- names(Idents(postal) )[Idents(postal) %in% 4]
data <- as.matrix(postal@assays$RNA@counts)
#
annotation <- generateAnnotation(id_type="hgnc_symbol", genes=rownames(data), ishg19=F,
centromere=centromere, host="uswest.ensembl.org")
data <- data[match( annotation$Gene,rownames(data)), ]
# create CaSpER object
cytoband <- read.delim("Z:\\SBMI_Houston\\Akdes Serin\\mtx\\cytoBand.txt", header=F)
cytoband_v2 <- data.frame(V1=gsub("chr", "", cytoband[,1]), V2=cytoband[,2], V3=cytoband[,3], V4="p", stringsAsFactors=F)
cytoband_v2$V4 [grep("q", cytoband$V4)] <- "q"
cytoband_v2 <- cytoband_v2[cytoband_v2$V1 %in% c(1:22, "X", "Y"), ]
cytoband <- cytoband_v2
cytoband <- cytoband[cytoband$V1 %in% c(1:22), ]
cytoband_v2 <- cytoband[,1:4]
cytoband_v2$V4 <-substring(cytoband$V4, 1, 1)
start <-  do.call(rbind, lapply(split(cytoband_v2$V2, paste0(cytoband_v2$V1, cytoband_v2$V4)), min))
end <- do.call(rbind, lapply(split(cytoband_v2$V3, paste0(cytoband_v2$V1, cytoband_v2$V4)), max))
cytoband <- data.frame(V1=gsub("p", "", gsub("q", "",  rownames(start))), V2=start, V3=end, V4=rownames(start), stringsAsFactors=F)
cytoband$V4[grep("q", cytoband$V4)] <- "q"
cytoband$V4[grep("p", cytoband$V4)] <- "p"
cytoband <- cytoband [ as.vector(sapply(c(1:22, "X"), function(x)  which(cytoband$V1 %in% x))), ]
rownames(cytoband) <- NULL
cytoband
cytoband <- cytoband [ as.vector(sapply(c(1:22), function(x)  which(cytoband$V1 %in% x))), ]
rownames(cytoband) <- NULL
http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/cytoBand.txt.gz
clusters <- postal@meta.data$seurat_clusters
#controls <- rownames(postal@meta.data)[postal@meta.data$seurat_clusters %in% c(6,2)]
controls <- rownames(postal@meta.data)[postal@meta.data$seurat_clusters %in% c(4)]
object <- CreateCasperObject(raw.data=data,loh.name.mapping=NULL,
sequencing.type="single-cell",
cnv.scale=3, loh.scale=3, window.length=50, length.iterations=50,
expr.cutoff = 0.1, filter="mean", matrix.type="raw",
annotation=annotation, method="iterative", loh=NULL,
control.sample.ids=controls, cytoband=cytoband)
final.objects <- list()
loh.list <- list()
cnv.list <- list()
message("Performing HMM segmentation...")
for (i in 1:object@cnv.scale) {
print(i)
cnv.list[[i]] <- PerformSegmentationWithHMM(object, cnv.scale = i, removeCentromere = T, cytoband = cytoband)
}
combin <- expand.grid(1:object@cnv.scale, 1:object@loh.scale)
list.names <- apply(combin, 1, function(x) paste(x[1], x[2], sep = "_vs_"))
combin <- expand.grid(1:object@cnv.scale, 1:object@loh.scale)
list.names <- apply(combin, 1, function(x) paste(x[1], x[2], sep = "_vs_"))
for (i in 1:3) {
object <- cnv.list[[i]]
object@segments$states2 <- rep("neut", length(object@segments$state))
object@segments$states2[as.numeric(as.character(object@segments$state)) %in% c(1)] <- "del"
object@segments$states2[as.numeric(as.character(object@segments$state)) %in% c(5)] <- "amp"
final.objects[[i]] <- generateLargeScaleEvents(object)
}
finalChrMat <- extractLargeScaleEvents (final.objects, thr=0.75)
annotation_row = data.frame(ID=rep("",length(rownames(postal@meta.data)) ), stringsAsFactors=F)
rownames(annotation_row) = rownames(postal@meta.data)
annotation_row[postal@meta.data$seurat_clusters %in% c(4),] <- "control"
k<- pheatmap(finalChrMat[,],annotation_row=annotation_row, color = colorRampPalette(rev(brewer.pal(n = 7, name =
"RdBu")))(100), show_rownames=F)
require(Matrix)
library(Seurat)
setwd("Z:\\SBMI_Houston\\Akdes Serin\\mtx\\")
filtered.counts <- Read10X("Z:\\SBMI_Houston\\Akdes Serin\\mtx\\Front_analysis\\filtered_feature_bc_matrix")
frontal <- CreateSeuratObject(counts = filtered.counts, min.cells = 3, min.features = 200)
frontal[["percent.mt"]] <- PercentageFeatureSet(frontal, pattern = "^MT-")
frontal <- subset(frontal, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
# standard log-normalization
frontal <- NormalizeData(frontal, normalization.method = "LogNormalize", scale.factor = 10000)
# choose ~1k variable features
frontal <- FindVariableFeatures(frontal)
# standard scaling (no regression)
frontal <- ScaleData(frontal)
frontal <- RenameCells(object = frontal, add.cell.id = "frontal_")
#VlnPlot(frontal, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
filtered.counts <- Read10X("Z:\\SBMI_Houston\\Akdes Serin\\mtx\\Post_analysis\\filtered_feature_bc_matrix")
postal  <- CreateSeuratObject(counts = filtered.counts, min.cells = 3, min.features = 200)
postal[["percent.mt"]] <- PercentageFeatureSet(postal, pattern = "^MT-")
postal <- subset(postal, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
# standard log-normalization
postal <- NormalizeData(postal, normalization.method = "LogNormalize", scale.factor = 10000)
# choose ~1k variable features
postal <- FindVariableFeatures(postal)
# standard scaling (no regression)
postal <- ScaleData(postal)
postal <- RenameCells(object = postal, add.cell.id = "postal_")
meningioma <- list (postal=postal, frontal=frontal)
anchors <- FindIntegrationAnchors(object.list = meningioma, dims = 1:20)
combined <- IntegrateData(anchorset = anchors, dims = 1:20)
DefaultAssay(combined) <- "integrated"
# Run the standard workflow for visualization and clustering
combined <- ScaleData(combined, verbose = FALSE)
combined <- RunPCA(combined, npcs = 30, verbose = FALSE)
# t-SNE and Clustering
combined <- RunTSNE(combined, dims = 1:25, method = "FIt-SNE")
combined <- RunUMAP(combined, dims = 1:25, reduction = "pca")
combined <- FindNeighbors(combined, reduction = "pca", dims = 1:20)
combined <- FindClusters(combined, resolution = 0.5)
VlnPlot(combined, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
p2 <- DimPlot(combined, label = TRUE)
region <-  rep("", length(rownames(combined@meta.data)))
names(region) <- rownames(combined@meta.data)
region[grep("postal", names(region)) ] <- "postal"
region[grep("frontal", names(region)) ] <- "frontal"
combined@meta.data$region <- region
#saveRDS(combined, file="combinedForOldR.rds")
#saveRDS(combined, file="combined.rds")
p1 <- DimPlot(combined)
plot_grid(p1, p2)
DimPlot(combined, split.by = "region", label=T)
FeaturePlot(combined, features = "PTPRC", min.cutoff = "q9")
library(CaSpER)
data("hg38_cytoband.rda")
data("hg38_cytoband")
cytoband
cytoband2 <- read.delim("Z:\\SBMI_Houston\\Akdes Serin\\mtx\\cytoBand.txt", header=F)
centromere <- cytoband2[cytoband2$V5=="acen", ]
centromere
rownames(centromere) <- NULL
centromere
save(list=c("cytoband", "centromere
"), file="hg38_cytoband.rda")
save(list=c("cytoband", "centromere
"), file="hg38_cytoband.rda")
save(list=c("cytoband", "centromere"), file="hg38_cytoband.rda")
install_github("akdess/CaSpER")
require(devtools)
install_github("akdess/CaSpER")
require(devtools)
install_github("akdess/CaSpER", force=T)
require(devtools)
install_github("akdess/CaSpER", force=T)