Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding SCTransform as an alternative to anchor transfer-Wilms tumor annotation (SCPCP000014) #836

Merged
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/run_cell-type-wilms-tumor-14.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ jobs:
with:
working-directory: ${{ env.MODULE_PATH }}

- name: Initialize zellkonverter environment
run: |
cd ${MODULE_PATH}
Rscript -e "proc <- basilisk::basiliskStart(env = zellkonverter::zellkonverterAnnDataEnv(), testload = 'anndata'); basilisk::basiliskStop(proc)"

# Update this step as needed to download the desired data
- name: Download test data and results
run: |
Expand Down
3 changes: 2 additions & 1 deletion analyses/cell-type-wilms-tumor-14/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@
!/scratch/.gitkeep

# Plots for certain steps should not be committed
/plots/01_anchor_transfer_seurat/*.pdf
/plots/01_anchor_transfer_seurat/*.pdf
/plots/01_anchor_transfer_seurat/*/*.pdf
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ dir.create(library_out_dir, showWarnings = FALSE, recursive = TRUE)
# load pre-processed sample objs & anchor transfer results
obj <- SeuratObject::LoadSeuratRds( file.path(path_anal,"scratch","00_preprocessing_rds",paste0(library_id,".rdsSeurat")) )
level <- "compartment"
predictions <- read.csv( file.path(path_anal, "results", "01_anchor_transfer_seurat", paste0(library_id, "_", level,".csv")) )
predictions <- read.csv( file.path(path_anal, "results", "01_anchor_transfer_seurat", "RNA", paste0(library_id, "_", level,".csv")) )
obj <- AddMetaData(object = obj, metadata = predictions)
copykat_result <- readr::read_rds( file = file.path(library_out_dir, paste0(library_id, "_copykat_resultobj.rds")) )
copykat_result_noref <- readr::read_rds( file = file.path(library_out_dir, paste0(library_id, "_noref_copykat_resultobj.rds")) )
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ dir.create(library_out_dir, showWarnings = FALSE, recursive = TRUE)
# load pre-processed sample objs & anchor transfer results
obj <- SeuratObject::LoadSeuratRds( file.path(path_anal,"scratch","00_preprocessing_rds",paste0(library_id,".rdsSeurat")) )
level <- "compartment"
predictions <- read.csv( file.path(path_anal, "results", "01_anchor_transfer_seurat", paste0(library_id, "_", level,".csv")) )
predictions <- read.csv( file.path(path_anal, "results", "01_anchor_transfer_seurat", "RNA", paste0(library_id, "_", level,".csv")) )
obj <- AddMetaData(object = obj, metadata = predictions)
infercnv_result <- readr::read_rds( file = file.path(library_out_dir, "run.final.infercnv_obj") )
# add infercnv output to seurat object
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dir.create(library_out_dir, showWarnings = FALSE, recursive = TRUE)
# load pre-processed sample objs & anchor transfer results
obj <- SeuratObject::LoadSeuratRds( file.path(path_anal,"scratch","00_preprocessing_rds",paste0(library,".rdsSeurat")) )
level <- "compartment"
predictions <- read.csv( file.path(path_anal, "results", "01_anchor_transfer_seurat", paste0(library, "_", level,".csv")) )
predictions <- read.csv( file.path(path_anal, "results", "01_anchor_transfer_seurat", "RNA", paste0(library_id, "_", level,".csv")) )
JingxuanChen7 marked this conversation as resolved.
Show resolved Hide resolved
obj <- AddMetaData(object = obj, metadata = predictions)

# prepare copykat input matrix & normal cell list
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dir.create(library_out_dir, showWarnings = FALSE, recursive = TRUE)
# load pre-processed sample objs & anchor transfer results
obj <- SeuratObject::LoadSeuratRds( file.path(path_anal,"scratch","00_preprocessing_rds",paste0(library,".rdsSeurat")) )
level <- "compartment"
predictions <- read.csv( file.path(path_anal, "results", "01_anchor_transfer_seurat", paste0(library, "_", level,".csv")) )
predictions <- read.csv( file.path(path_anal, "results", "01_anchor_transfer_seurat", "RNA", paste0(library_id, "_", level,".csv")) )
JingxuanChen7 marked this conversation as resolved.
Show resolved Hide resolved
obj <- AddMetaData(object = obj, metadata = predictions)

# create annotation file for infercnv
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
JingxuanChen7 marked this conversation as resolved.
Show resolved Hide resolved
File renamed without changes
127 changes: 127 additions & 0 deletions analyses/cell-type-wilms-tumor-14/renv.lock
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,22 @@
],
"Hash": "395472c65cd9d606a1a345687102f299"
},
"DelayedMatrixStats": {
"Package": "DelayedMatrixStats",
"Version": "1.26.0",
"Source": "Bioconductor",
"Repository": "Bioconductor 3.19",
"Requirements": [
"DelayedArray",
"IRanges",
"Matrix",
"MatrixGenerics",
"S4Vectors",
"methods",
"sparseMatrixStats"
],
"Hash": "5d9536664ccddb0eaa68a90afe4ee76e"
},
"Deriv": {
"Package": "Deriv",
"Version": "4.1.6",
Expand Down Expand Up @@ -248,6 +264,29 @@
],
"Hash": "a3c822ef3c124828e25e7a9611beeb50"
},
"HDF5Array": {
"Package": "HDF5Array",
"Version": "1.32.1",
"Source": "Bioconductor",
"Repository": "Bioconductor 3.19",
"Requirements": [
"BiocGenerics",
"DelayedArray",
"IRanges",
"Matrix",
"R",
"Rhdf5lib",
"S4Arrays",
"S4Vectors",
"methods",
"rhdf5",
"rhdf5filters",
"stats",
"tools",
"utils"
],
"Hash": "420012f82591a2a20156ef65d4aa210a"
},
"HiddenMarkov": {
"Package": "HiddenMarkov",
"Version": "1.8-13",
Expand Down Expand Up @@ -531,6 +570,16 @@
],
"Hash": "c232938949fcd8126034419cc529333a"
},
"Rhdf5lib": {
"Package": "Rhdf5lib",
"Version": "1.26.0",
"Source": "Bioconductor",
"Repository": "Bioconductor 3.19",
"Requirements": [
"R"
],
"Hash": "c92ba8b9a2c5c9ff600a1062a3b7b727"
},
"RhpcBLASctl": {
"Package": "RhpcBLASctl",
"Version": "0.23-42",
Expand Down Expand Up @@ -944,6 +993,21 @@
],
"Hash": "39d6ecdea862d961c3dfe4d4d7c57920"
},
"beachmat": {
"Package": "beachmat",
"Version": "2.20.0",
"Source": "Bioconductor",
"Repository": "Bioconductor 3.19",
"Requirements": [
"BiocGenerics",
"DelayedArray",
"Matrix",
"Rcpp",
"SparseArray",
"methods"
],
"Hash": "10e94b1bce9070632a40c6b873f8b2d4"
},
"bit": {
"Package": "bit",
"Version": "4.5.0",
Expand Down Expand Up @@ -1753,6 +1817,32 @@
],
"Hash": "a57f0f5dbcfd0d77ad4ff33032f5dc79"
},
"glmGamPoi": {
"Package": "glmGamPoi",
"Version": "1.16.0",
"Source": "Bioconductor",
"Repository": "Bioconductor 3.19",
"Requirements": [
"BiocGenerics",
"DelayedArray",
"DelayedMatrixStats",
"HDF5Array",
"MatrixGenerics",
"Rcpp",
"RcppArmadillo",
"SingleCellExperiment",
"SummarizedExperiment",
"beachmat",
"matrixStats",
"methods",
"rlang",
"splines",
"stats",
"utils",
"vctrs"
],
"Hash": "21e305cf5faebb13bee698a5a1c4bced"
},
"globals": {
"Package": "globals",
"Version": "0.16.3",
Expand Down Expand Up @@ -2910,6 +3000,29 @@
],
"Hash": "e1a5d04397edc1580c5e0ed1dbdccf76"
},
"rhdf5": {
"Package": "rhdf5",
"Version": "2.48.0",
"Source": "Bioconductor",
"Repository": "Bioconductor 3.19",
"Requirements": [
"R",
"Rhdf5lib",
"methods",
"rhdf5filters"
],
"Hash": "74d8c5aeb96d090ce8efc9ffd16afa2b"
},
"rhdf5filters": {
"Package": "rhdf5filters",
"Version": "1.16.0",
"Source": "Bioconductor",
"Repository": "Bioconductor 3.19",
"Requirements": [
"Rhdf5lib"
],
"Hash": "99e15369f8fb17dc188377234de13fc6"
},
"rjags": {
"Package": "rjags",
"Version": "4-16",
Expand Down Expand Up @@ -3231,6 +3344,20 @@
],
"Hash": "ffe1f9e95a4375530747b268f82b5086"
},
"sparseMatrixStats": {
"Package": "sparseMatrixStats",
"Version": "1.16.0",
"Source": "Bioconductor",
"Repository": "Bioconductor 3.19",
"Requirements": [
"Matrix",
"MatrixGenerics",
"Rcpp",
"matrixStats",
"methods"
],
"Hash": "7e500a5a527460ca0406473bdcade286"
},
"spatstat.data": {
"Package": "spatstat.data",
"Version": "3.1-2",
Expand Down
26 changes: 12 additions & 14 deletions analyses/cell-type-wilms-tumor-14/results/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,28 @@ Instead, copy results files to an S3 bucket and add a link to the S3 location in
- On a Lightsail virtual computer, run following script to sync the results:
```bash
export OPENSCPCA_RESULTS_BUCKET=researcher-009160072044-us-east-2
cd /home/lightsail-user/git/OpenScPCA-analysis
cd /home/lightsail-user/repo/OpenScPCA-analysis
scripts/sync-results.py cell-type-wilms-tumor-14 \
--bucket ${OPENSCPCA_RESULTS_BUCKET}
```
#### 00. Pre-processing the provided SCE objects
No result files.

#### 01. Anchor transfer using Seurat
* The label transfer analysis was performed in two levels: `celltype` and `compartment`.
* Results are uploaded to `s3://researcher-009160072044-us-east-2/cell-type-wilms-tumor-14/results/01_anchor_transfer_seurat`
```
.
├── [sample_id]_celltype.csv
├── [sample_id]_celltype.pdf
├── [sample_id]_compartment.csv
└── [sample_id]_compartment.pdf
```

* The label transfer analysis was performed in two levels: `celltype` and `compartment`.
* `[sample_id]_[level].csv` label transfer result table including cell ID, predicted cell type, along with predicted scores.
* `[sample_id]_[level].pdf` label transfer result plots consisting of 3 pages:
1. UMAP visualization colored by transferred labels and Seurat clusters, as well as a bar plot showing cell type composition of each Seurat cluster.
2. UMAP visualization colored and split by transferred labels.
3. Distribution for max prediction score. Note: predictions with scores < 0.5 would be labeled as "Unknown" in this analysis.

* `[sample_id]_[level].csv` label transfer result table including cell ID, predicted cell type, along with predicted scores.
* Plots are uploaded to `s3://researcher-009160072044-us-east-2/cell-type-wilms-tumor-14/plots/01_anchor_transfer_seurat/`
* `[sample_id]_[level].pdf` label transfer result plots consisting of 3 pages:
1. UMAP visualization colored by transferred labels and Seurat clusters, as well as a bar plot showing cell type composition of each Seurat cluster.
2. UMAP visualization colored and split by transferred labels.
3. Distribution for max prediction score. Note: predictions with scores < 0.5 would be labeled as "Unknown" in this analysis.

* Anchor transfer was performed with two normalization methods in subfolders:
* `results/01_anchor_transfer_seurat/RNA`: Results generated by normalization method `LogNormalize`.
* `results/01_anchor_transfer_seurat/SCT`: Results generated by normalization method `SCTransform`.
#### 02. Curating marker gene lists
TBD

Expand Down
4 changes: 4 additions & 0 deletions analyses/cell-type-wilms-tumor-14/run_cell-type-wilms-14.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ scratch_dir_step="${scratch_dir}/${step_name}" && mkdir -p ${scratch_dir_step}
# Download and process reference data
ref_h5ad="${scratch_dir_step}/Fetal_full_v3.h5ad"
ref_seurat="${scratch_dir_step}/kidneyatlas.rdsSeurat"
ref_seurat_sct="${scratch_dir_step}/kidneyatlas_SCT.rdsSeurat"

if [[ ! -e ${ref_h5ad} ]]; then
ref_url="https://cellgeni.cog.sanger.ac.uk/kidneycellatlas/Fetal_full_v3.h5ad"
Expand All @@ -35,6 +36,7 @@ Rscript scripts/${step_name}.R \
--in_fetal_atlas "${ref_h5ad}" \
--out_fetal_atlas "${ref_seurat}"


## Preprocess data
Rscript scripts/00_preprocessing_rds.R

Expand All @@ -57,4 +59,6 @@ fi
Rscript scripts/01_anchor_transfer_seurat.R \
--reference "${ref_seurat}" \
--metadata "${meta_path}" \
--run_LogNormalize \
--run_SCT \
$TEST_FLAG
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ library(Seurat)
library(ggpubr)
library(zellkonverter)
library(SingleCellExperiment)
library(glmGamPoi)

prepare_fetal_atlas <- function(in_fetal_atlas = in_fetal_atlas,
out_fetal_atlas = out_fetal_atlas,
Expand Down Expand Up @@ -65,10 +66,16 @@ prepare_fetal_atlas <- function(in_fetal_atlas = in_fetal_atlas,
seurat_obj[["RNA"]]@meta.data <- row_metadata
# add metadata from SingleCellExperiment to Seurat
seurat_obj@misc <- S4Vectors::metadata(sce)
# log transform counts

# normalization
options(future.globals.maxSize = 2000 * 1024^2)
# log transform counts using strandard seurat workflow
seurat_obj <- Seurat::NormalizeData(seurat_obj, normalization.method = "LogNormalize")
seurat_obj <- Seurat::FindVariableFeatures(seurat_obj, selection.method = "vst", nfeatures = 3000)
seurat_obj <- Seurat::ScaleData(seurat_obj)
# normalize with SCTransform
seurat_obj <- Seurat::SCTransform(seurat_obj, conserve.memory = TRUE)

ndims <- 50
seurat_obj <- Seurat::RunPCA(seurat_obj, npcs = ndims)
seurat_obj <- Seurat::FindNeighbors(seurat_obj, dims = 1:ndims)
Expand Down
Loading
Loading