Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug correction and allow pipeline run without aggregation #36

Merged
merged 7 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions bin/QC.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@
"metadata": {},
"outputs": [],
"source": [
"if not plots == \"\": \n",
"if plots is not None: \n",
" plots = pd.read_csv(plots, header=None)\n",
" plots = plots.iloc[0,:].values.tolist()\n",
" for plot in plots: \n",
Expand Down Expand Up @@ -361,9 +361,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:postprocessing]",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "postprocessing"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -375,7 +375,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
"version": "3.12.2"
}
},
"nbformat": 4,
Expand Down
25 changes: 0 additions & 25 deletions bin/aggregation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import argparse
import scanpy as sc
import numpy as np
from utils import get_basename_without_extension

parser = argparse.ArgumentParser(description="wrapper for concatenating the samples.")
parser.add_argument(
Expand All @@ -16,9 +15,6 @@
for idx, h5_file_path in enumerate(args.inputs, start=1):
print(h5_file_path)
adata = sc.read(h5_file_path)
adata.obs["sample_name"] = get_basename_without_extension(h5_file_path).rsplit(
"_", 1
)[0]
adata.obs.index = adata.obs["sample_name"] + "_" + adata.obs.index
adata.var_names_make_unique()
adata_list.append(adata)
Expand All @@ -30,25 +26,4 @@
permuted_indices = np.random.permutation(combined_adata.n_obs)
combined_adata = combined_adata[permuted_indices, :]

# Compute QC metrics useful for downstream analysis
combined_adata.var["mito"] = combined_adata.var_names.str.upper().str.startswith(
("MT-")
)
combined_adata.var["ribo"] = combined_adata.var_names.str.upper().str.startswith(
("RPS", "RPL")
)

combined_adata.obs["mito_frac"] = np.sum(
combined_adata[:, combined_adata.var["mito"]].X, axis=1
) / np.sum(combined_adata.X, axis=1)

sc.pp.calculate_qc_metrics(combined_adata, percent_top=[20], inplace=True)
sc.pp.calculate_qc_metrics(
combined_adata,
qc_vars=["ribo", "mito"],
percent_top=None,
log1p=False,
inplace=True,
)

combined_adata.write_h5ad(args.output)
5 changes: 4 additions & 1 deletion bin/doublet_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import argparse
import scanpy as sc
import doubletdetection
from utils import anndata_from_h5
from utils import anndata_from_h5, get_basename_without_extension

os.environ["NUMBA_CACHE_DIR"] = "/tmp/"

Expand Down Expand Up @@ -98,4 +98,7 @@
adata_batch.obs["doublet"] = doublets.astype(bool)
adata_batch.obs["doublet_score"] = doublet_score

adata_batch.obs["sample_name"] = get_basename_without_extension(args.input_h5).rsplit(
"_", 1
)[0]
adata_batch.write_h5ad(args.output_h5ad)
17 changes: 17 additions & 0 deletions bin/outlier_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,23 @@ def designate_outliers(
# Load data
adata = sc.read_h5ad(args.input_h5ad)

# Compute QC metrics useful for downstream analysis
adata.var["mito"] = adata.var_names.str.upper().str.startswith(("MT-"))
adata.var["ribo"] = adata.var_names.str.upper().str.startswith(("RPS", "RPL"))

adata.obs["mito_frac"] = np.sum(adata[:, adata.var["mito"]].X, axis=1) / np.sum(
adata.X, axis=1
)

sc.pp.calculate_qc_metrics(adata, percent_top=[20], inplace=True)
sc.pp.calculate_qc_metrics(
adata,
qc_vars=["ribo", "mito"],
percent_top=None,
log1p=False,
inplace=True,
)

calculate_group_featcount_dist(adata, group_key=args.sample_col)

# Mark outliers
Expand Down
5 changes: 4 additions & 1 deletion bin/run_celltypist.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,14 @@
# Check if the model path exists, if not
if os.path.exists(args.model):
# Load the model
print(f"Found model at {args.models}")
model = models.Model.load(model=args.model)
print(f"Loaded {args.model}")
else:
# Download and download the model
# Download the model
print("Downloading the model...")
models.download_models(model=args.model)
model = args.model
print(f"Downloaded {args.model} at {models.models_path}")

# prediction
Expand Down
31 changes: 20 additions & 11 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,34 +25,43 @@ workflow {
def raw_path = file(row[1])
def filtered_path = row[2]
def demultiplexing = row[3].toLowerCase()
return tuple(name, raw_path, filtered_path, demultiplexing)
def expected_droplets = row[4]
return tuple(name, raw_path, filtered_path, demultiplexing, expected_droplets)
}

// run Cellbender
CELLBENDER(ch_input)

// run DoubletDetection with optional demultiplexing
// run DoubletDetection (Optional: demultiplexing)
DOUBLETDETECTION(CELLBENDER.out.name, CELLBENDER.out.raw_h5, CELLBENDER.out.filtered_path, CELLBENDER.out.demultiplexing)

// aggregate the outputs
AGGREGATION(DOUBLETDETECTION.out.doublet_h5ad.collect())
// Optional: aggregate the outputs
if (params.aggregation) {
AGGREGATION(DOUBLETDETECTION.out.doublet_h5ad.collect())
outlier_input_name = AGGREGATION.out.name
outlier_input = AGGREGATION.out.aggregation_h5ad
} else {
outlier_input_name = DOUBLETDETECTION.out.name
outlier_input = DOUBLETDETECTION.out.doublet_h5ad
}

// filter out outliers
OUTLIER_FILTER(AGGREGATION.out.aggregation_h5ad)
OUTLIER_FILTER(outlier_input_name, outlier_input)

// SCRAN normalization
SCRAN(OUTLIER_FILTER.out.outlier_filtered_h5ad)
SCRAN(OUTLIER_FILTER.out.name, OUTLIER_FILTER.out.outlier_filtered_h5ad)

// SCVI batch correction
SCVI(SCRAN.out.scran_h5ad)
SCVI(SCRAN.out.name, SCRAN.out.scran_h5ad)

// Postprocessing
POSTPROCESSING(SCVI.out.scvi_h5ad)
POSTPROCESSING(SCVI.out.name, SCVI.out.scvi_h5ad)

// Celltypist
CELLTYPIST(POSTPROCESSING.out.postprocessing_scvi_h5ad)
CELLTYPIST(POSTPROCESSING.out.name, POSTPROCESSING.out.postprocessing_scvi_h5ad)

// Report
REPORT(POSTPROCESSING.out.postprocessing_h5ad,
CELLTYPIST.out.celltypist_scvi_h5ad)
REPORT(CELLTYPIST.out.name,
CELLTYPIST.out.postprocessing_h5ad,
CELLTYPIST.out.celltypist_scvi_h5ad)
}
1 change: 1 addition & 0 deletions modules/aggregation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ process AGGREGATION {
val doublet_h5ads

output:
val "${params.experiment.name}", emit: name
path "${params.experiment.name ? params.experiment.name + '_' : ''}aggregation.h5ad", emit: aggregation_h5ad

script:
Expand Down
6 changes: 3 additions & 3 deletions modules/cellbender/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ process CELLBENDER {
publishDir "${params.outdir}/cellbender/", mode: 'copy'

input:
tuple val(name), path(raw_path), val(filtered_path), val(demultiplexing)
tuple val(name), path(raw_path), val(filtered_path), val(demultiplexing), val(expected_droplets)

output:
val "${name}", emit: name
Expand All @@ -21,15 +21,15 @@ process CELLBENDER {
python ${baseDir}/bin/run_cellbender.py \
${raw_path} \
${name}_cellbender.h5 \
${params.cellbender.total_droplets_included} \
${expected_droplets} \
--filtered ${filtered_path}
"""
else
"""
python ${baseDir}/bin/run_cellbender.py \
${raw_path} \
${name}_cellbender.h5 \
${params.cellbender.total_droplets_included} \
${expected_droplets} \
--filtered ${filtered_path}
"""
}
11 changes: 7 additions & 4 deletions modules/celltypist/main.nf
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
process CELLTYPIST {
label 'gpus'
container 'quay.io/teichlab/celltypist:latest'
containerOptions '--nv'
containerOptions '--nv --bind ${params.mount}'
publishDir "${params.outdir}/celltypist/", mode: 'copy'

input:
val name
path postprocessing_scvi_h5ad

output:
path "${params.experiment.name}_celltypist_scvi.h5ad", emit: celltypist_scvi_h5ad
val "${name}", emit: name
path "${name}_celltypist_scvi.h5ad", emit: celltypist_scvi_h5ad
path "${postprocessing_scvi_h5ad}", emit: postprocessing_h5ad

script:
def gpu_index = task.index % params.maxForks
Expand All @@ -19,7 +22,7 @@ process CELLTYPIST {
export MPLCONFIGDIR=\$PWD
python ${baseDir}/bin/run_celltypist.py \
${postprocessing_scvi_h5ad} \
${params.experiment.name}_celltypist_scvi.h5ad \
${name}_celltypist_scvi.h5ad \
--model ${params.celltypist.model} \
--majority_voting \
--use_gpu \
Expand All @@ -31,7 +34,7 @@ process CELLTYPIST {
export MPLCONFIGDIR=\$PWD
python ${baseDir}/bin/run_celltypist.py \
${postprocessing_scvi_h5ad} \
${params.experiment.name}_celltypist_scvi.h5ad \
${name}_celltypist_scvi.h5ad \
--model ${params.celltypist.model} \
--majority_voting \
--use_gpu \
Expand Down
1 change: 1 addition & 0 deletions modules/doubletdetection/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ process DOUBLETDETECTION {
val demultiplexing

output:
val "${name}", emit: name
path "${name}_doubletdetection.h5ad", emit: doublet_h5ad

script:
Expand Down
6 changes: 4 additions & 2 deletions modules/outlierfilter/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@ process OUTLIER_FILTER {
publishDir "${params.outdir}/outlier_filtered/", mode: 'copy'

input:
val name
path aggregation_h5ad

output:
path "${params.experiment.name ? params.experiment.name + '_' : ''}outlier_filtered.h5ad", emit: outlier_filtered_h5ad
val "${name}", emit: name
path "${name}_outlier_filtered.h5ad", emit: outlier_filtered_h5ad

script:
"""
export NUMBA_CACHE_DIR=\$PWD
python ${baseDir}/bin/outlier_filter.py \
${aggregation_h5ad} \
${params.experiment.name ? params.experiment.name + '_' : ''}outlier_filtered.h5ad
${name}_outlier_filtered.h5ad
"""
}
10 changes: 6 additions & 4 deletions modules/postprocessing/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,25 @@ process POSTPROCESSING {
publishDir "${params.outdir}/postprocessing/", mode: 'copy'

input:
val name
path scvi_h5ad

output:
path "${params.experiment.name ? params.experiment.name + '_' : ''}postprocessing.h5ad", emit: postprocessing_h5ad
path "${params.experiment.name ? params.experiment.name + '_' : ''}postprocessing_scvi.h5ad", emit: postprocessing_scvi_h5ad
val "${name}", emit: name
path "${name}_postprocessing.h5ad", emit: postprocessing_h5ad
path "${name}_postprocessing_scvi.h5ad", emit: postprocessing_scvi_h5ad

script:
"""
export NUMBA_CACHE_DIR=\$PWD
python ${baseDir}/bin/postprocessing.py \
${scvi_h5ad} \
${params.experiment.name ? params.experiment.name + '_' : ''}postprocessing.h5ad \
${name}_postprocessing.h5ad \
--n_pca_components ${params.postprocessing.n_pca_components} \
--metadata ${params.postprocessing.metadata}
python ${baseDir}/bin/postprocessing.py \
${scvi_h5ad} \
${params.experiment.name ? params.experiment.name + '_' : ''}postprocessing_scvi.h5ad \
${name}_postprocessing_scvi.h5ad \
--use_scvi \
--metadata ${params.postprocessing.metadata}
"""
Expand Down
9 changes: 5 additions & 4 deletions modules/report/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,19 @@ process REPORT {
publishDir "${params.outdir}/report/", mode: 'copy'

input:
val name
path postprocessing_h5ad
path celltypist_scvi_h5ad

output:
path "${params.experiment.name}_report.ipynb", emit: report_ipynb
path "${params.experiment.name}_report.html", emit: report_html
path "${name}_report.ipynb", emit: report_ipynb
path "${name}_report.html", emit: report_html

script:
"""
export HOME=\$PWD
python -m ipykernel install --user --name postprocessing
papermill ${baseDir}/bin/QC.ipynb ${params.experiment.name}_report.ipynb -p plots ${params.report.plots}
jupyter nbconvert --to html ${params.experiment.name}_report.ipynb
papermill ${baseDir}/bin/QC.ipynb ${name}_report.ipynb -p plots ${params.report.plots}
jupyter nbconvert --to html ${name}_report.ipynb
"""
}
6 changes: 4 additions & 2 deletions modules/scran/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,19 @@ process SCRAN {
publishDir "${params.outdir}/scran/", mode: 'copy'

input:
val name
file aggregation_h5ad

output:
path "${params.experiment.name ? params.experiment.name + '_' : ''}scran.h5ad", emit: scran_h5ad
val "${name}", emit: name
path "${name}_scran.h5ad", emit: scran_h5ad

script: // set home directory to cache basilisk
"""
export PATH=/opt/conda/envs/scran/bin/:$PATH
export HOME=\$PWD
Rscript ${baseDir}/bin/scran.R \
${aggregation_h5ad} \
"${params.experiment.name ? params.experiment.name + '_' : ''}scran.h5ad"
"${name}_scran.h5ad"
"""
}
6 changes: 4 additions & 2 deletions modules/scvi/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@ process SCVI {
publishDir "${params.outdir}/scvi/", mode: 'copy'

input:
val name
path scran_h5ad

output:
path "${params.experiment.name ? params.experiment.name + '_' : ''}scvi.h5ad", emit: scvi_h5ad
val "${name}", emit: name
path "${name}_scvi.h5ad", emit: scvi_h5ad

script:
"""
export NUMBA_CACHE_DIR=\$PWD
python ${baseDir}/bin/scvi_norm.py \
${scran_h5ad} \
${params.experiment.name ? params.experiment.name + '_' : ''}scvi.h5ad \
${name}_scvi.h5ad \
--n_latent ${params.scvi.n_latent} \
--n_top_genes ${params.scvi.n_top_genes}
"""
Expand Down
Loading