Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the numba cache issue and other minor bugs #27

Merged
merged 8 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bin/demultiplex.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import os
import scanpy.external as sce
from utils import anndata_from_h5
import argparse

os.environ["NUMBA_CACHE_DIR"] = "/tmp/"

parser = argparse.ArgumentParser(description="Demultiplexing.")
parser.add_argument("input", help="Paths to the raw h5ad file.")
parser.add_argument("--output", required=True, help="The output h5ad file path.")
Expand Down
3 changes: 3 additions & 0 deletions bin/doublet_detection.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
#!/usr/bin/env python
import os
import argparse
import scanpy as sc
import doubletdetection
from utils import anndata_from_h5

os.environ["NUMBA_CACHE_DIR"] = "/tmp/"

parser = argparse.ArgumentParser(
description="wrapper for DoubletDetection for doublet detection from transcriptomic data."
)
Expand Down
4 changes: 2 additions & 2 deletions bin/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ def RunPCA(cts, var_threshold, n_components):
help="Use SCVI latent variable instead of PCA.",
)
parser.add_argument(
"--metadata", required=False, default="", help="Metadata to be added to obs."
"--metadata", required=False, default="None", help="Metadata to be added to obs."
)
args = parser.parse_args()

adata = sc.read_h5ad(args.input_h5ad)

if not args.metadata == "":
if not args.metadata == "None":
metadata = pd.read_csv(args.metadata)
new_cols = [x for x in metadata.columns if x not in adata.obs.columns]
intersect_cols = [x for x in metadata.columns if x in adata.obs.columns]
Expand Down
2 changes: 2 additions & 0 deletions bin/scvi_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
torch.set_float32_matmul_precision("high")

adata = sc.read_h5ad(args.input)
sc.pp.filter_genes(adata, min_cells=1)

adata.layers["X_scran"] = adata.X
sc.pp.log1p(adata, base=2)

Expand Down
3 changes: 2 additions & 1 deletion modules/aggregation/main.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
process AGGREGATION {
label 'process_low'
label 'process_medium'
container 'library://mamie_wang/nf-scrnaseq/doubletdetection.sif:latest'
containerOptions '--bind /lila:/lila'
publishDir "${params.outdir}/aggregation/", mode: 'copy'

input:
Expand Down
4 changes: 2 additions & 2 deletions modules/celltypist/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ process CELLTYPIST {
def gpu_index = task.index % params.maxForks
if(task.executor == 'lsf')
"""
export NUMBA_CACHE_DIR=/tmp/numba_cache
export NUMBA_CACHE_DIR=${workDir}
python ${baseDir}/bin/run_celltypist.py \
${postprocessing_scvi_h5ad} \
${params.experiment.name}_celltypist_scvi.h5ad \
Expand All @@ -26,7 +26,7 @@ process CELLTYPIST {
else
"""
export CUDA_VISIBLE_DEVICES=$gpu_index
export NUMBA_CACHE_DIR=/tmp/numba_cache
export NUMBA_CACHE_DIR=${workDir}
python ${baseDir}/bin/run_celltypist.py \
${postprocessing_scvi_h5ad} \
${params.experiment.name}_celltypist_scvi.h5ad \
Expand Down
4 changes: 3 additions & 1 deletion modules/doubletdetection/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process DOUBLETDETECTION {
label 'process_low'
label 'process_medium'
container 'library://mamie_wang/nf-scrnaseq/doubletdetection.sif:latest'
containerOptions '--bind /lila:/lila'
publishDir "${params.outdir}/doubletdetection/", mode: 'copy'
Expand All @@ -16,6 +16,7 @@ process DOUBLETDETECTION {
script:
if(demultiplexing)
"""
export NUMBA_CACHE_DIR=${workDir}
python ${baseDir}/bin/demultiplex.py \
${cellbender_h5} \
--output ${name}_hashsolo.h5ad
Expand All @@ -26,6 +27,7 @@ process DOUBLETDETECTION {
"""
else
"""
export NUMBA_CACHE_DIR=${params.workDir}
python ${baseDir}/bin/doublet_detection.py \
${cellbender_h5} \
${name}_doubletdetection.h5ad \
Expand Down
4 changes: 2 additions & 2 deletions modules/outlierfilter/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process OUTLIER_FILTER {
label 'process_low'
label 'process_medium'
container 'library://mamie_wang/nf-scrnaseq/postprocessing.sif:latest'
publishDir "${params.outdir}/outlier_filtered/", mode: 'copy'

Expand All @@ -11,7 +11,7 @@ process OUTLIER_FILTER {

script:
"""
export NUMBA_CACHE_DIR=/tmp/numba_cache
export NUMBA_CACHE_DIR=${workDir}
python ${baseDir}/bin/outlier_filter.py \
${aggregation_h5ad} \
${params.experiment.name ? params.experiment.name + '_' : ''}outlier_filtered.h5ad
Expand Down
2 changes: 1 addition & 1 deletion modules/postprocessing/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ process POSTPROCESSING {

script:
"""
export NUMBA_CACHE_DIR=/tmp/numba_cache
export NUMBA_CACHE_DIR=${workDir}
python ${baseDir}/bin/postprocessing.py \
${scvi_h5ad} \
${params.experiment.name ? params.experiment.name + '_' : ''}postprocessing.h5ad \
Expand Down
5 changes: 1 addition & 4 deletions modules/report/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@ process REPORT {

script:
"""
if [ ! -d "/tmp/ipykernel" ]; then
mkdir -p "/tmp/ipykernel"
fi
export HOME=/tmp/ipykernel
export HOME=${workDir}
python -m ipykernel install --user --name postprocessing
papermill ${baseDir}/bin/QC.ipynb ${params.experiment.name}_report.ipynb -p plots ${params.report.plots}
jupyter nbconvert --to html ${params.experiment.name}_report.ipynb
Expand Down
2 changes: 1 addition & 1 deletion modules/scran/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process SCRAN {
label 'process_low'
label 'process_medium'
container 'library://mamie_wang/nf-scrnaseq/scran.sif:latest'
publishDir "${params.outdir}/scran/", mode: 'copy'

Expand Down
2 changes: 1 addition & 1 deletion modules/scvi/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ process SCVI {

script:
"""
export NUMBA_CACHE_DIR=/tmp/numba_cache
export NUMBA_CACHE_DIR=${workDir}
python ${baseDir}/bin/scvi_norm.py \
${scran_h5ad} \
${params.experiment.name ? params.experiment.name + '_' : ''}scvi.h5ad \
Expand Down
9 changes: 2 additions & 7 deletions params.yml
Original file line number Diff line number Diff line change
@@ -1,21 +1,16 @@
samplesheet: "./data/sampleSheet.csv"
outdir: "../results-test/"
outdir: "../scratch/"
experiment:
name: "toy_data"
cellbender:
total_droplets_included: 2000
aggregation:
percent_top: "50,100"
total_counts: 1
n_genes_by_counts: 1
log10GenesPerUMI: 0
mito_frac: 1
scvi:
n_latent: 10
n_top_genes: 10
postprocessing:
n_pca_components: 3
n_diffmap_components: 3
metadata: "None"
with_gpu: true
max_memory: "6.GB"
max_cpus: 6
Expand Down
2 changes: 1 addition & 1 deletion run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module load gcc/10.2.0
module load cuda/11.7
export NXF_SINGULARITY_CACHEDIR="/lila/data/chanjlab/wangm10/work-nf-scrnaseq/singularity/"

nextflow run ./main.nf -resume -profile singularity -params-file ./params.yml -w "/lila/data/chanjlab/wangm10/work-nf-scrnaseq/"
nextflow run ./main.nf -resume -profile lilac -params-file ./params.yml -w "/lila/data/chanjlab/wangm10/scratch/"
# nextflow run ./main.nf -resume 0b2aab78-3380-47e5-bf3a-d2babb788b9f -profile lilac -params-file ./params.yml -w "../work-cellbender-test/"

# nextflow run ./main.nf -resume -profile singularity -params-file ../oliver_RPMN_2024/oliver_RPMN_2024_params.yml -w "/lila/data/chanjlab/wangm10/work-nf-scrnaseq/"
Expand Down