joechanlab · Mamie · Jul 16, 2024 · Jul 15, 2024 · Jul 15, 2024 · Jul 16, 2024
diff --git a/bin/demultiplex.py b/bin/demultiplex.py
@@ -1,7 +1,10 @@
+import os
 import scanpy.external as sce
 from utils import anndata_from_h5
 import argparse
 
+os.environ["NUMBA_CACHE_DIR"] = "/tmp/"
+
 parser = argparse.ArgumentParser(description="Demultiplexing.")
 parser.add_argument("input", help="Paths to the raw h5ad file.")
 parser.add_argument("--output", required=True, help="The output h5ad file path.")

diff --git a/bin/doublet_detection.py b/bin/doublet_detection.py
@@ -1,9 +1,12 @@
 #!/usr/bin/env python
+import os
 import argparse
 import scanpy as sc
 import doubletdetection
 from utils import anndata_from_h5
 
+os.environ["NUMBA_CACHE_DIR"] = "/tmp/"
+
 parser = argparse.ArgumentParser(
     description="wrapper for DoubletDetection for doublet detection from transcriptomic data."
 )

diff --git a/bin/postprocessing.py b/bin/postprocessing.py
@@ -74,13 +74,13 @@ def RunPCA(cts, var_threshold, n_components):
     help="Use SCVI latent variable instead of PCA.",
 )
 parser.add_argument(
-    "--metadata", required=False, default="", help="Metadata to be added to obs."
+    "--metadata", required=False, default="None", help="Metadata to be added to obs."
 )
 args = parser.parse_args()
 
 adata = sc.read_h5ad(args.input_h5ad)
 
-if not args.metadata == "":
+if not args.metadata == "None":
     metadata = pd.read_csv(args.metadata)
     new_cols = [x for x in metadata.columns if x not in adata.obs.columns]
     intersect_cols = [x for x in metadata.columns if x in adata.obs.columns]

diff --git a/bin/scvi_norm.py b/bin/scvi_norm.py
@@ -24,6 +24,8 @@
 torch.set_float32_matmul_precision("high")
 
 adata = sc.read_h5ad(args.input)
+sc.pp.filter_genes(adata, min_cells=1)
+
 adata.layers["X_scran"] = adata.X
 sc.pp.log1p(adata, base=2)
 

diff --git a/modules/aggregation/main.nf b/modules/aggregation/main.nf
@@ -1,6 +1,7 @@
 process AGGREGATION {
-    label 'process_low'
+    label 'process_medium'
     container 'library://mamie_wang/nf-scrnaseq/doubletdetection.sif:latest'
+    containerOptions '--bind /lila:/lila'
     publishDir "${params.outdir}/aggregation/", mode: 'copy'
 
     input:

diff --git a/modules/celltypist/main.nf b/modules/celltypist/main.nf
@@ -14,7 +14,7 @@ process CELLTYPIST {
     def gpu_index = task.index % params.maxForks
     if(task.executor == 'lsf')
         """
-        export NUMBA_CACHE_DIR=/tmp/numba_cache
+        export NUMBA_CACHE_DIR=${workDir}
         python ${baseDir}/bin/run_celltypist.py \
             ${postprocessing_scvi_h5ad} \
             ${params.experiment.name}_celltypist_scvi.h5ad \
@@ -26,7 +26,7 @@ process CELLTYPIST {
     else
         """
         export CUDA_VISIBLE_DEVICES=$gpu_index
-        export NUMBA_CACHE_DIR=/tmp/numba_cache
+        export NUMBA_CACHE_DIR=${workDir}
         python ${baseDir}/bin/run_celltypist.py \
             ${postprocessing_scvi_h5ad} \
             ${params.experiment.name}_celltypist_scvi.h5ad \

diff --git a/modules/doubletdetection/main.nf b/modules/doubletdetection/main.nf
@@ -1,5 +1,5 @@
 process DOUBLETDETECTION {
-    label 'process_low'
+    label 'process_medium'
     container 'library://mamie_wang/nf-scrnaseq/doubletdetection.sif:latest'
     containerOptions '--bind /lila:/lila'
     publishDir "${params.outdir}/doubletdetection/", mode: 'copy'
@@ -16,6 +16,7 @@ process DOUBLETDETECTION {
     script:
     if(demultiplexing)
         """
+        export NUMBA_CACHE_DIR=${workDir}
         python ${baseDir}/bin/demultiplex.py \
             ${cellbender_h5} \
             --output ${name}_hashsolo.h5ad
@@ -26,6 +27,7 @@ process DOUBLETDETECTION {
         """
     else
         """
+        export NUMBA_CACHE_DIR=${params.workDir}
         python ${baseDir}/bin/doublet_detection.py \
             ${cellbender_h5} \
             ${name}_doubletdetection.h5ad \

diff --git a/modules/outlierfilter/main.nf b/modules/outlierfilter/main.nf
@@ -1,5 +1,5 @@
 process OUTLIER_FILTER {
-    label 'process_low'
+    label 'process_medium'
     container 'library://mamie_wang/nf-scrnaseq/postprocessing.sif:latest'
     publishDir "${params.outdir}/outlier_filtered/", mode: 'copy'
 
@@ -11,7 +11,7 @@ process OUTLIER_FILTER {
 
     script:
     """
-    export NUMBA_CACHE_DIR=/tmp/numba_cache
+    export NUMBA_CACHE_DIR=${workDir}
     python ${baseDir}/bin/outlier_filter.py \
         ${aggregation_h5ad} \
         ${params.experiment.name ? params.experiment.name + '_' : ''}outlier_filtered.h5ad

diff --git a/modules/postprocessing/main.nf b/modules/postprocessing/main.nf
@@ -12,7 +12,7 @@ process POSTPROCESSING {
 
     script:
     """
-    export NUMBA_CACHE_DIR=/tmp/numba_cache
+    export NUMBA_CACHE_DIR=${workDir}
     python ${baseDir}/bin/postprocessing.py \
         ${scvi_h5ad} \
         ${params.experiment.name ? params.experiment.name + '_' : ''}postprocessing.h5ad \

diff --git a/modules/report/main.nf b/modules/report/main.nf
@@ -13,10 +13,7 @@ process REPORT {
 
     script:
     """
-    if [ ! -d "/tmp/ipykernel" ]; then
-        mkdir -p "/tmp/ipykernel"
-    fi
-    export HOME=/tmp/ipykernel
+    export HOME=${workDir}
     python -m ipykernel install --user --name postprocessing
     papermill ${baseDir}/bin/QC.ipynb ${params.experiment.name}_report.ipynb -p plots ${params.report.plots}
     jupyter nbconvert --to html ${params.experiment.name}_report.ipynb

diff --git a/modules/scran/main.nf b/modules/scran/main.nf
@@ -1,5 +1,5 @@
 process SCRAN {
-    label 'process_low'
+    label 'process_medium'
     container 'library://mamie_wang/nf-scrnaseq/scran.sif:latest'
     publishDir "${params.outdir}/scran/", mode: 'copy'
 

diff --git a/modules/scvi/main.nf b/modules/scvi/main.nf
@@ -12,7 +12,7 @@ process SCVI {
 
     script:
     """
-    export NUMBA_CACHE_DIR=/tmp/numba_cache
+    export NUMBA_CACHE_DIR=${workDir}
     python ${baseDir}/bin/scvi_norm.py \
         ${scran_h5ad} \
         ${params.experiment.name ? params.experiment.name + '_' : ''}scvi.h5ad \

diff --git a/params.yml b/params.yml
@@ -1,21 +1,16 @@
 samplesheet: "./data/sampleSheet.csv"
-outdir: "../results-test/"
+outdir: "../scratch/"
 experiment:
     name: "toy_data"
 cellbender:
     total_droplets_included: 2000
-aggregation:
-    percent_top: "50,100"
-    total_counts: 1
-    n_genes_by_counts: 1
-    log10GenesPerUMI: 0
-    mito_frac: 1
 scvi:
     n_latent: 10
     n_top_genes: 10
 postprocessing:
     n_pca_components: 3
     n_diffmap_components: 3
+    metadata: "None"
 with_gpu: true
 max_memory: "6.GB"
 max_cpus: 6

diff --git a/run_test.sh b/run_test.sh
@@ -3,7 +3,7 @@ module load gcc/10.2.0
 module load cuda/11.7
 export NXF_SINGULARITY_CACHEDIR="/lila/data/chanjlab/wangm10/work-nf-scrnaseq/singularity/"
 
-nextflow run ./main.nf -resume -profile singularity -params-file ./params.yml -w "/lila/data/chanjlab/wangm10/work-nf-scrnaseq/"
+nextflow run ./main.nf -resume -profile lilac -params-file ./params.yml -w "/lila/data/chanjlab/wangm10/scratch/"
 # nextflow run ./main.nf -resume 0b2aab78-3380-47e5-bf3a-d2babb788b9f -profile lilac -params-file ./params.yml -w "../work-cellbender-test/"
 
 # nextflow run ./main.nf -resume -profile singularity -params-file ../oliver_RPMN_2024/oliver_RPMN_2024_params.yml -w "/lila/data/chanjlab/wangm10/work-nf-scrnaseq/"