Merge pull request #130 from fungenomics/dev

moved ontology processing to separate rule (#129)
fungenomics · Jul 17, 2024 · 83f09d2 · 83f09d2
2 parents dd44c0e + 7952035
commit 83f09d2
Show file tree

Hide file tree

Showing 4 changed files with 82 additions and 28 deletions.
diff --git a/Notebooks/annotate_report.Rmd b/Notebooks/annotate_report.Rmd
@@ -103,7 +103,7 @@ query = query %>%
 cluster_pal = create_color_pal(query$seurat_clusters)
 ```
 
-```{r echo=FALSE,message=FALSE,results="asis"}
+```{r echo=FALSE,message=FALSE,results="asis",fig.height=10,fig.width=10}
 set.seed(12345)
 cat("\n")
 

diff --git a/Scripts/ontology.R b/Scripts/ontology.R
@@ -0,0 +1,44 @@
+
+library(tidyverse)
+
+set.seed(1234)
+
+args = commandArgs(trailingOnly = TRUE)
+
+out = args[1]
+reference_name = args[2]
+lab_path = args[3]
+ontology_path = args[4]                                                                                                                            
+ontology_columns = strsplit(args[5], split = ' ')[[1]]
+
+print(out)
+print(lab_path)
+print(reference_name)
+print(ontology_path)
+print(ontology_columns)
+
+#----- SAVE ONTOLOGY -----------------------------------
+
+dir.create(paste0(out, '/model/', reference_name, '/ontology/'), recursive = T)
+
+if(length(ontology_columns) == 1 & ontology_columns[1] == 'label'){
+
+  lab = data.table::fread(lab_path, header = T)
+  print(lab)
+
+  ont = data.frame(label = unique(lab$label))
+  print(ont)
+
+  data.table::fwrite(ont,
+                     file = paste0(out, '/model/', reference_name, '/ontology/ontology.csv'),
+                     sep = ',')
+}else{
+  ont = data.table::fread(ontology_path) 
+  print(ont)
+
+  data.table::fwrite(ont,
+                     file = paste0(out, '/model/', reference_name, '/ontology/ontology.csv'),
+                     sep = ',')
+}
+
+#--------------------------------------------------------
diff --git a/Scripts/preprocess.R b/Scripts/preprocess.R
@@ -33,12 +33,9 @@ if(is.na(downsample_per_class)){
   stop("The downsample stratified specified is not a logical value")
 }
 
-ontology_path = args[11]
-ontology_columns = strsplit(args[12], split = ' ')[[1]]
-
 names(query_paths) = query_names
 
-batch_path = args[13]
+batch_path = args[11]
 if(batch_path == 'None'){
   batch_path = NULL
 }
@@ -172,17 +169,5 @@ for(q in query_names){
   data.table::fwrite(tmp, file = paste0(out, '/', q, '/', reference_name, '/expression.csv'), sep = ',')
 }
 
-#----- SAVE BASE ONTOLOGY ------------------------------
-
-if(length(ontology_columns) == 1 & ontology_columns[1] == 'label'){
-
-  dir.create(paste0(out, '/model/', reference_name, '/ontology/'), recursive = T)
-
-  lab = data.frame(label = unique(lab$label))
-
-  data.table::fwrite(lab, 
-                     file = paste0(out, '/model/', reference_name, '/ontology/ontology.csv'),
-                     sep = ',')
-}
+#---------------------------------------------------------
 
-#--------------------------------------------------------
diff --git a/snakefile.annotate b/snakefile.annotate
@@ -73,8 +73,6 @@ rule preprocess:
     min_cells_per_cluster = lambda wildcards:config["references"][wildcards.reference]['min_cells_per_cluster'],
     downsample_value = lambda wildcards:config["references"][wildcards.reference]['downsample']['value'],
     downsample_per_class = lambda wildcards:config["references"][wildcards.reference]['downsample']['stratified'],
-    ontology_path = lambda wildcards:config["references"][wildcards.reference]["ontology"]["ontology_path"],
-    ontology_column = lambda wildcards:config["references"][wildcards.reference]["ontology"]["ontology_column"]
   shell:
     """
     Rscript {params.basedir}/Scripts/preprocess.R \
@@ -88,20 +86,48 @@ rule preprocess:
     {params.min_cells_per_cluster} \
     {params.downsample_value} \
     {params.downsample_per_class} \
-    "{params.ontology_path}" \
-    "{params.ontology_column}" \
     "{params.batch_path}" \
     &> {log} 
     """
 
+
+#----------------------------------------------------
+#  Ontology 
+#----------------------------------------------------
+
+rule ontology:
+  input:
+     labfile = config['output_dir'] + "/model/{reference}/downsampled_labels.csv"
+  output:
+     ontology = config['output_dir'] + "/model/{reference}/ontology/ontology.csv"
+  log:
+     config['output_dir'] + "/model/{reference}/ontology.log"
+  params:
+     basedir = {workflow.basedir},
+     out = config['output_dir'],
+     reference_name = "{reference}",
+     ontology_path = lambda wildcards:config["references"][wildcards.reference]["ontology"]["ontology_path"],
+     ontology_column = lambda wildcards:config["references"][wildcards.reference]["ontology"]["ontology_column"]
+  shell:
+     """
+      Rscript {params.basedir}/Scripts/ontology.R \
+       {params.out} \
+       {params.reference_name} \
+       {input.labfile} \
+       "{params.ontology_path}" \
+       "{params.ontology_column}" \
+      &> {log}
+     """
+
 #----------------------------------------------------
 #  Consensus
 #----------------------------------------------------
 
 rule consensus:
   input:
     results = expand(config["output_dir"] + "/{{sample}}/{{reference}}/{tool}/{tool}_pred.csv",
-                     tool=tools_to_run)
+                     tool=tools_to_run), 
+    ontology = config['output_dir'] + "/model/{reference}/ontology/ontology.csv"
   output:
     prediction_summary = config['output_dir'] + "/{sample}/{reference}/{consensus_type}/Prediction_Summary_{ontology}.tsv"
   log: 
@@ -116,7 +142,6 @@ rule consensus:
     metrics_file =  lambda wildcards:(f'{config["references"][wildcards.reference]["output_dir_benchmark"]}/'f'{wildcards.reference}/report/metrics_label.csv'),
     CAWPE_mode = config["consensus"]["type"]["CAWPE"]["mode"],
     alpha = config["consensus"]["type"]["CAWPE"]["alpha"],
-    ontology_path = lambda wildcards:config["references"][wildcards.reference]["ontology"]["ontology_path"], 
     ontology_label = "{ontology}",
     accuracy_metric = config["consensus"]["type"]["CAWPE"]["accuracy_metric"]
   shell:
@@ -129,7 +154,7 @@ rule consensus:
     "{params.consensus_type}" \
     "{params.min_agree}" \
     "{params.ontology_label}" \
-    "{params.ontology_path}" \
+    "{input.ontology}" \
     {params.metrics_file} \
     "{params.CAWPE_mode}" \
     "{params.alpha}" \
@@ -150,7 +175,8 @@ def ontology_function(wildcards):
 rule knit_report:
   input: 
     pred = ontology_function,
-    query = lambda wildcards:config['query_datasets'][wildcards.sample]
+    query = lambda wildcards:config['query_datasets'][wildcards.sample],
+    ontology = config['output_dir'] + "/model/{reference}/ontology/ontology.csv"
   output: 
     report_path = config['output_dir'] + '/{sample}/report/{sample}.{consensus_type}.prediction_report.{reference}.' + dt_string + '.html'
   log:
@@ -162,7 +188,6 @@ rule knit_report:
     tools = tools_to_run, 
     consensus_tools = consensus_tools,
     refs = "{reference}",
-    ontology_path = lambda wildcards:config["references"][wildcards.reference]["ontology"]["ontology_path"],
     ontology_columns = lambda wildcards:config["references"][wildcards.reference]["ontology"]["ontology_column"],
     marker_genes = config['marker_genes'],
     cons_type = "{consensus_type}"
@@ -182,7 +207,7 @@ rule knit_report:
                           marker_genes  = '{params.marker_genes}',
                           threads       = '{threads}',
                           query         = '{input.query}',
-                          ontology_path = '{params.ontology_path}',
+                          ontology_path = '{input.ontology}',
                           ontology_columns = '{params.ontology_columns}'),
             output_file = '{output.report_path}')" \
     &> {log}