scPDtools provides several comprehensive set of tools for visulization to assist python in single-cell analysis
- High-quality data visualization methods.
- Differntial gene expression analysis
- Compositional analysis
- Gene set enrichment and pathway analysis
- Some R tools in inferring trajectories and Visulization
The functions in the SCP package are all developed around the Seurat object and are compatible with other Seurat functions.
- If you only have data in the form of AnnData, you can refer to
./step0.R
for conversion
scPDtools refers to the following packages
- R >= 4.2.0
You can install the latest version of scPDtools from GitHub with:
if (!require("devtools", quietly = TRUE)) {
install.packages("devtools")
}
devtools::install_github("pigudog/scPDtools")
# 1. Anndata -> Seurat (V4)
library(scPDtools)
# AnnData to Seurat
h5ad_file = './data/data_M_annotation.h5ad'
convertFormat(h5ad_file, from="anndata", to="seurat",main_layer = "counts_log1p",
outFile='data/data_M_annotation.rds')
library(Seurat)
adata = readRDS("./data/data_M_annotation.rds")
save(adata,file="./data/data_M_annotation.rda")
output:
The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
which was just loaded, will retire in October 2023.
Please refer to R-spatial evolution reports for details, especially
https://r-spatial.org/r/2023/05/15/evolution4.html.
It may be desirable to make the sf package available;
package maintainers should consider adding sf to Suggests:.
The sp package is now running under evolution status 2
(status 2 uses the sf package in place of rgdal)
layers.`log1p` -> data; layers.`counts` -> counts
Let's take windows running as an example
- We set the number of parallel processes to 8
library(scPDtools)
library(Seurat)
library(BiocParallel)
param <- bpparam()
bpworkers(param) <- 8##设置并行的进程数
BiocParallel::bpparam()
# class: SnowParam
# bpisup: FALSE; bpnworkers: 8; bptasks: 0; bpjobname: BPJOB
# bplog: FALSE; bpthreshold: INFO; bpstopOnError: TRUE
# bpRNGseed: ; bptimeout: NA; bpprogressbar: FALSE
# bpexportglobals: TRUE; bpexportvariables: TRUE; bpforceGC: FALSE
# bpfallback: TRUE
# bplogdir: NA
# bpresultdir: NA
# cluster type: SOCK
# load data
data("mac_menstrual")
# mac_scp = NormalizeData(mac_scp)
mac_scp@meta.data$CellType = mac_scp@meta.data$`cell type`
mac_scp@meta.data$Binary_Stage <- mac_scp@meta.data$`Binary Stage`
Idents(mac_scp) = mac_scp@meta.data$CellType
We have built-in pallette called 'ov_pallette'
- To correspond to our pallette used in python
######################################################
# color主要是使用RdBu和RdPu和ov_pallette
colnames(mac_scp@meta.data)
# 1.show
## normal
CellDimPlot(
srt = mac_scp, group.by = c("CellType", "Stage"),
pt.size = 3,palette = "ov_palette",
reduction = "UMAP",
theme_use = "theme_blank",
subtitle = "celltype and stage"
)
We can set the 'stat.by' parameter to achieve the composition calculation, which can more intuitively see the cell composition in different periods
## stage and celldimplot
CellDimPlot(
srt = mac_scp, group.by = "CellType",
pt.size = 5,palette = "ov_palette",
stat.by = "Stage",
reduction = "UMAP",
theme_use = "theme_blank"
)
## feature
FeatureDimPlot(
srt = mac_scp, features = c("FOLR2", "SPARCL1", "SPP1", "FCN1"),
reduction = "UMAP", theme_use = "theme_blank",
palette = "RdPu",pt.size = 3
)
FeatureDimPlot(
srt = mac_scp, features = c("FOLR2", "SPARCL1", "SPP1", "FCN1"),
reduction = "UMAP", theme_use = "theme_blank",
palette = "RdBu",pt.size = 3
)
FeatureDimPlot(
srt = mac_scp, features = c("FOLR2", "SPARCL1", "SPP1", "FCN1"),
compare_features = TRUE,
label = TRUE,
label_insitu = TRUE,
reduction = "UMAP",
theme_use = "theme_blank",
pt.size = 5,
palette = "ov_palette",
)
I prefer the 'GroupHeatmap()' function
## groupheatmap!!
library(dplyr)
ht <- GroupHeatmap(
srt = mac_scp,
features = c(
"FCN1", "EREG", # MONO
"FOLR2", "SEPP1", # FOLR2
"APOC1", "APOE","SPP1", # APOC1
"SPARCL1", "CALD1"# SPARCL1
),
group.by = c("CellType", "Stage"),
heatmap_palette = "YlOrRd",
cell_annotation = c("Stage", "Binary_Stage"),
cell_annotation_palette = c("Dark2", "Paired"),
show_row_names = TRUE, row_names_side = "left",
add_dot = TRUE, add_reticle = TRUE,
libsize = 1
)
print(ht$plot)
# 2. findmarkers
## findallmarkers in Seurat
markers = FindAllMarkers(mac_scp)
markers$group1 = markers$cluster
mac_scp@tools$DEtest_CellType$AllMarkers_seurat = markers
## findmarker is used in RunDEtest
# you need to understand findmarkers,findallmarkes,FindConservedMarkers
# note: ident.2 means the group for compare
mac_scp <- RunDEtest(srt = mac_scp,
group_by = "CellType",
fc.threshold = 1,
only.pos = FALSE,
BPPARAM = BiocParallel::bpparam())
# visualization
VolcanoPlot(srt = mac_scp, group_by = "CellType",
test.use = "seurat",
DE_threshold = "avg_log2FC > 0.2 & p_val_adj < 0.05",
palette = "RdBu",
palcolor = NULL)
VolcanoPlot(srt = mac_scp, group_by = "CellType",
test.use = "wilcox",
DE_threshold = "avg_log2FC > 0 & p_val_adj < 0.05",
palette = "RdBu",
palcolor = NULL)
findallmarker
seems to get fewer genes when trying, so it is recommended to use findmarker
in DEtest
or adjust the parameters in findallmarkers
DEGs <- mac_scp@tools$DEtest_CellType$AllMarkers_wilcox
DEGs <- DEGs[with(DEGs, avg_log2FC > 1 & p_val_adj < 0.05), ]
# Annotate features with transcription factors and surface proteins
mac_scp <- AnnotateFeatures(mac_scp,
species = "Homo_sapiens", # Homo_sapiens,Mus_musculus
db = c("TF","CSPA"))
table(mac_scp@assays[["RNA"]]@meta.features[["CSPA"]])
table(mac_scp@assays[["RNA"]]@meta.features[["TF"]])
table(mac_scp@assays[["RNA"]]@meta.features[["highly_variable_genes"]])
ht <- FeatureHeatmap(
srt = mac_scp, group.by = "CellType",
features = DEGs$gene,
feature_split = DEGs$group1,
species = "Homo_sapiens",
db = c("GO_BP", "KEGG", "WikiPathway"),
anno_terms = TRUE,
feature_annotation = c("TF", "CSPA"),
feature_annotation_palcolor = list(c("gold", "steelblue"),c("forestgreen")),
height = 5, width = 4,
libsize = 1
)
print(ht$plot)
# enrichment
mac_scp <- RunEnrichment(
srt = mac_scp,
test.use = "wilcox",
group_by = "CellType",
db = c("MSigDB","GO_BP", "KEGG", "WikiPathway"), # c("GO_BP","KEGG", "WikiPathway", "Reactome", "PFAM", "MP","MSigDB", "MSigDB_MH")
species = "Homo_sapiens",
DE_threshold = "avg_log2FC > log2(0.5) & p_val_adj < 1"
)
table(mac_scp@meta.data$CellType)
EnrichmentPlot(
srt = mac_scp,
db = c("MSigDB"),
group_by = "CellType",
group_use = c("FOLR2+ Mac" ,"SPARCL1+ Mac" ),# , "SPP1+ Mac"
plot_type = "bar",
topTerm = 10,
padjustCutoff = 1
)
EnrichmentPlot(
srt = mac_scp, group_by = "CellType", group_use = c("FOLR2+ Mac" ,"SPARCL1+ Mac" , "SPP1+ Mac" ),
plot_type = "bar"
)
mac_scp <- RunGSEA(
srt = mac_scp, group_by = "CellType", db = "GO_BP", species = "Homo_sapiens",
DE_threshold = "p_val_adj < 0.05"
)
GSEAPlot(srt = mac_scp, group_by = "CellType", group_use = "SPP1+ Mac", id_use = "GO:0007186")
# slingshot
mac_scp <- RunSlingshot(srt = mac_scp, group.by = "CellType", reduction = "UMAP")
# FeatureDimPlot(mac_scp, features = paste0("Lineage", 1:3), reduction = "UMAP", theme_use = "theme_blank")
FeatureDimPlot(mac_scp, features = c("Lineage1","palantir_pseudotime","palantir_entropy"),
reduction = "UMAP", theme_use = "theme_blank",pt.size = 3)
CellDimPlot(mac_scp, group.by = "CellType", reduction = "UMAP",
lineages = c("Lineage1","palantir_pseudotime","palantir_entropy"),
lineages_span = 1)
mac_scp <- RunDynamicFeatures(srt = mac_scp, lineages = c("Lineage1","palantir_pseudotime"), n_candidates = 200)
ht <- DynamicHeatmap(
srt = mac_scp,
lineages = c("Lineage1","palantir_pseudotime"),
use_fitted = TRUE, n_split = 6,
reverse_ht = "palantir_pseudotime",
species = "Homo_sapiens",
db = "MSigDB",
anno_terms = TRUE,
anno_keys = TRUE,
anno_features = TRUE,
heatmap_palette = "RdPu",
cell_annotation = "CellType",
# separate_annotation = list("CellType", c("SPP1+ Mac", "FOLR2+ Mac" )),
# separate_annotation_palette = c("Paired", "Set1"),
feature_annotation = c("TF", "CSPA"),
feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
pseudotime_label = 25, pseudotime_label_color = "red",
height = 5, width = 2
)
print(ht$plot)