PYF-100G数据.txt


library(tidyverse)
library(Seurat)
library(ggplot2)
outdir = "E:/薛琪/1020"
sample = "PYF"


matrix_file = "E:/薛琪/1020/PYF-wta_filtered_feature_bc_matrix"
matrix= Seurat::Read10X(matrix_file, gene.column = 2) #dgCMatrix
rds = CreateSeuratObject(matrix, project=sample,min.cells = 5,min.features = 50)
all_genes = rownames(rds@assays$RNA@data)
mito.genes <- grep(pattern = "^MT-", x = all_genes, value = TRUE, ignore.case=TRUE)
percent.mito <- Matrix::colSums(rds@assays$RNA@counts[mito.genes,,drop=FALSE])/Matrix::colSums(rds@assays$RNA@counts)
rds <- AddMetaData(object = rds, metadata = percent.mito, col.name = "percent.mito")
rds = subset(rds, percent.mito<0.2)
VlnPlot(object = rds, features = c("nFeature_RNA", "nCount_RNA", "percent.mito"), ncol = 3, pt.size = 0.1)
rds <- NormalizeData(rds, normalization.method = "LogNormalize",scale.factor = 10000)
nfeatures = 2000

rds <- FindVariableFeatures(rds, selection.method = "vst", nfeatures = nfeatures, mean.cutoff = c(0.1, 8), dispersion.cutoff = c(1, Inf),
                            mean.function = ExpMean, dispersion.function = LogVMR)
use.genes <- rds@assays$RNA@var.features
rds <- ScaleData(rds, vars.to.regress = c("nCount_RNA", "percent.mito"), features = use.genes)
rds <- RunPCA(object = rds, features = use.genes, do.print = FALSE)
rds <- FindNeighbors(rds, dims = 1:20, force.recalc = TRUE, reduction = "pca")
rds <- FindClusters(rds, resolution = 2)
DimPlot(rds,reduction = "pca")->p
p

rds = RunUMAP(rds, dims=1:20)
DimPlot(rds,reduction = "umap",label = TRUE)->p
p
p_file <- paste0(outdir,"/",sample,".umap.eps")
ggsave(p_file,p,width = 8, height = 6)

VlnPlot(rds, features = c("Cd20","Ptprc","Cd19"))
VlnPlot(rds, features = c("Ly6c","Gsr","Cd14","Gsr","Itgam","S100a8"))

data.frame(cluster = c('0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26'),
           celltype = c("MՓ", "MՓ","MՓ","Mono","Tmem","Mono","DC","Tmem","MՓ",
                        "DC","CD4T","TEX Int","TEX Int","NK","TEX Term","TEX Term","MՓ","undetermined",
                        "TEX Int","DC","Mono","MՓ","DC","TEX Term","MՓ",
                        "undetermined","NK"))->celltype_df

celltype_df$cluster = factor(celltype_df$cluster)

data.frame(cell_ident = Cells(rds),cluster = rds$RNA_snn_res.2 )%>%
  left_join(celltype_df)->celltype_df

rownames(celltype_df) <- celltype_df$cell_ident
AddMetaData(rds,celltype_df%>%dplyr::select(-cluster,-cell_ident))->rds

tag_file = "E:/薛琪/1020/PYF-TAG_umi_tag.tsv"
tag_df = read.table(tag_file,header = TRUE)
head(tag_df)
rds <- AddMetaData(rds,tag_df)

subset(rds, tag %in% c("tag1","tag3","tag4","tag5"))->rds_tag

tag_df$cell_ident = rownames(tag_df)
data.frame(cell_ident = Cells(rds_tag), celltype = rds_tag$celltype)%>%
  left_join(tag_df)-> tag_df
rownames(tag_df) <- tag_df$cell_ident
tag_df$tag <- factor(tag_df$tag,levels = c("Multiplet","tag1","tag3","tag4","tag5","Undetermined"))

table(tag_df$tag)

ggplot(tag_df%>%filter(!is.na(tag)),aes(x = tag, fill = celltype))+
  geom_bar(position = "fill",width = 0.5)+
  #scale_fill_manual(values = c("SlateBlue3","DarkOrange","GreenYellow","Purple"))+
  theme_bw()
ggsave("PYF tag_df1023.jpg", width = 15, height = 10)
ggsave("PYF tag_df1023.jpg.eps", width = 15, height = 10)

tag_df %>%
  group_by(celltype)%>%
  summarise(n_celltype = n())-> count_celltype_df
tag_df %>%
  group_by(celltype,tag)%>%
  summarise(n_celltype_tag = n()) %>%
  left_join(count_celltype_df) %>%
  mutate(ratio=round(n_celltype_tag/n_celltype, 4)) -> gdf
head(gdf)


write.csv(gdf,file="pyf-gdf") 

ann_umap <- DimPlot(rds_tag, reduction = "umap", label = T, group.by = "celltype",split.by = "tag")
ggsave("ann_umap1011.eps", plot = ann_umap, width = 16, height = 6)
ggsave("ann_umap1011.jpg", plot = ann_umap, width = 20, height = 6)

rds_T = subset(rds_tag, celltype %in% c("CD4T","TEX Int","TEX Term","Tmem"))
rds_CD8 = subset(rds_tag, celltype %in% c("TEX Int","TEX Term","Tmem"))
rds_CD4 = subset(rds_tag, celltype %in% c("CD4T"))
rds_NK = subset(rds_tag, celltype %in% c("NK"))

rds_T <- RunPCA(object = rds_T, features = use.genes, do.print = FALSE)
rds_T <- FindNeighbors(rds_T, dims = 1:20, force.recalc = TRUE, reduction = "pca")
rds_T <- FindClusters(rds_T, resolution = 0.5)

DimPlot(rds_T,reduction = "pca")->p
p


rds_T = RunUMAP(rds_T, dims=1:20)
DimPlot(rds_T,reduction = "umap",label = TRUE)->p
p


ann_umap <- DimPlot(rds_T, reduction = "umap", label = T, split.by = "tag")
ggsave("ann_umap1023-T.eps", plot = ann_umap, width = 16, height = 6)
ggsave("ann_umap1023-T.jpg", plot = ann_umap, width = 20, height = 6)

VlnPlot(rds_T1, features = c("Il7r","Lef1","Sell","Cd44",""))

VlnPlot(rds_T1, features = c("Cxcr3","S1pr1","Klf2"," Itgb1","Cd44","Ly6c2"))

VlnPlot(rds_T1, features = c("Cxcr3","S1pr1","Klf2"," Itgb1","Cd44","Ly6c2"))

VlnPlot(rds_T1, features = c("Gzma","Gzmb","Gzmc","Gzmf","Ifng","Klrg1","Cx3cr1"))

VlnPlot(rds_T1, features = c("Havcr2","Lag3","Entpd1","Cd38","Tox","Ctla4","Ctla4"))

VlnPlot(rds_T1, features = c("Mki67","Top2a"))

VlnPlot(rds_T1, features = c("Tcf1","Tcf7","Cxcr5"))


gene = "Nkg7"
FeaturePlot(rds_NK, features = gene, split.by = "tag")

gene = "Btla"
FeaturePlot(rds_tag, features = gene, split.by = "tag")

gene = "Eomes"
FeaturePlot(rds_tag, features = gene, split.by = "tag")


library(readr)
library(dplyr)
library(cowplot)
library(ggplot2)
library('RColorBrewer')
library(Seurat)
library(readxl)
library(pheatmap)

features <-  read_excel("Migration.xlsx")
feature <- features$GENE
AverageExpression(rds_CD4,features=feature,group.by = "tag") -> Tgene
as.data.frame(Tgene) -> Tgene
head(Tgene)
pheatmap(Tgene,scale = "row", cluster_cols = F,color = colorRampPalette(c("navy", "white", "firebrick3"))(50))


features <-  read_excel("differentiation.xlsx")
feature <- features$GENE
AverageExpression(rds_NK,features=feature,group.by = "tag") -> Tgene
as.data.frame(Tgene) -> Tgene
head(Tgene)
pheatmap(Tgene,scale = "row", cluster_cols = F,color = colorRampPalette(c("navy", "white", "firebrick3"))(50))

rds = readRDS("PYF.rds")
subset(rds, tag %in% c("tag1","tag3","tag4","tag5"))->rds_tag
rds_T = subset(rds_tag, celltype %in% c("CD4T","TEX Int","TEX Term","Tmem"))
rds_CD8 = subset(rds_tag, celltype %in% c("TEX Int","TEX Term","Tmem"))
rds_CD4 = subset(rds_tag, celltype %in% c("CD4T"))
rds_NK = subset(rds_tag, celltype %in% c("NK"))

features <-  read_excel("Transcription factors_nk.xlsx")
feature <- features$GENE
AverageExpression(rds_NK,features=feature,group.by = "tag") -> Tgene
as.data.frame(Tgene) -> Tgene
head(Tgene)
pheatmap(Tgene,scale = "row", cluster_cols = F,color = colorRampPalette(c("navy", "white", "firebrick3"))(50))