From 07f8b2fd86558e5d5dc7edd7b7e79cc30a3af97b Mon Sep 17 00:00:00 2001
From: Nikelle Petrillo <38223776+nikellepetrillo@users.noreply.github.com>
Date: Thu, 12 Sep 2024 08:09:20 -0400
Subject: [PATCH] Np loom to h5ad snss2 (#1363)

* pin all latest docker version

* run with h5ad isntead o floom

* run with h5ad isntead o floom

* run with h5ad isntead o floom

* run with h5ad isntead o floom

* new docker

* merge h5ad files

* new path

* redo h5ad merge

* add verify task

* add verify task

* add verify task

* no more loom

* changing loom to h5ad

* try updating all instances of warp tools

* Updated pipeline_versions.txt with all pipeline version information

* test out markdown

* formatting

* formatting again

* formatting again

* changelogs and add SingleNucleusSmartSeq2LoomOutput back in

* Updated pipeline_versions.txt with all pipeline version information

* changelogs

* Updated pipeline_versions.txt with all pipeline version information

---------

Co-authored-by: GitHub Action <action@github.com>
---
 pipeline_versions.txt                         |  16 +--
 pipelines/skylab/atac/atac.changelog.md       |   5 +
 pipelines/skylab/atac/atac.wdl                |   2 +-
 .../skylab/multiome/Multiome.changelog.md     |   4 +
 pipelines/skylab/multiome/Multiome.wdl        |   2 +-
 pipelines/skylab/optimus/Optimus.changelog.md |   4 +
 pipelines/skylab/optimus/Optimus.wdl          |   4 +-
 .../skylab/paired_tag/PairedTag.changelog.md  |   4 +
 pipelines/skylab/paired_tag/PairedTag.wdl     |   2 +-
 .../skylab/slideseq/SlideSeq.changelog.md     |   5 +
 pipelines/skylab/slideseq/SlideSeq.wdl        |   4 +-
 .../MultiSampleSmartSeq2.changelog.md         |   5 +
 .../MultiSampleSmartSeq2.wdl                  |   2 +-
 ...iSampleSmartSeq2SingleNucleus.changelog.md |   5 +
 .../MultiSampleSmartSeq2SingleNucleus.wdl     |  28 ++--
 .../SmartSeq2SingleSample.changelog.md        |   5 +
 .../SmartSeq2SingleSample.wdl                 |   2 +-
 tasks/skylab/FastqProcessing.wdl              |   2 +-
 tasks/skylab/H5adUtils.wdl                    | 134 ++++++++++++++++++
 tasks/skylab/LoomUtils.wdl                    |   7 +-
 tasks/skylab/Metrics.wdl                      |   4 +-
 ...erifyMultiSampleSmartSeq2SingleNucleus.wdl |  10 +-
 .../TestMultiSampleSmartSeq2SingleNucleus.wdl |  10 +-
 .../README.md                                 |  46 +++---
 24 files changed, 238 insertions(+), 74 deletions(-)

diff --git a/pipeline_versions.txt b/pipeline_versions.txt
index c916cdbb41..e675c060e4 100644
--- a/pipeline_versions.txt
+++ b/pipeline_versions.txt
@@ -1,15 +1,15 @@
 Pipeline Name	Version	Date of Last Commit
-MultiSampleSmartSeq2SingleNucleus	 1.4.2	2024-08-25-02 
-MultiSampleSmartSeq2	 2.2.21	2023-04-19 
-PairedTag	 1.6.0	2024-08-02 
-Optimus	 7.6.0	2024-08-06 
-atac	 2.3.0	2024-08-29 
+MultiSampleSmartSeq2SingleNucleus	 2.0.0	2024-09-11 
+MultiSampleSmartSeq2	 2.2.22	2024-09-11 
+PairedTag	 1.6.1	2024-09-11 
+Optimus	 7.6.1	2024-09-11 
+atac	 2.3.1	2024-09-11 
 snm3C	 4.0.4	2024-08-06 
-SmartSeq2SingleSample	 5.1.20	2023-04-19 
-Multiome	 5.6.0	2024-08-02 
+SmartSeq2SingleSample	 5.1.21	2024-09-11 
+Multiome	 5.6.1	2024-09-11 
 scATAC	 1.3.2	2023-08-03 
 BuildIndices	 3.0.0	2023-12-06 
-SlideSeq	 3.4.0	2024-08-06 
+SlideSeq	 3.4.1	2024-09-11 
 BuildCembaReferences	 1.0.0	2020-11-15 
 CEMBA	 1.1.7	2024-09-06 
 GDCWholeGenomeSomaticSingleSample	 1.3.3	2024-09-06 
diff --git a/pipelines/skylab/atac/atac.changelog.md b/pipelines/skylab/atac/atac.changelog.md
index 544fb8ea50..d64620354a 100644
--- a/pipelines/skylab/atac/atac.changelog.md
+++ b/pipelines/skylab/atac/atac.changelog.md
@@ -1,3 +1,8 @@
+# 2.3.1
+2024-09-11 (Date of Last Commit)
+
+* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the atac pipeline
+
 # 2.3.0
 2024-08-29 (Date of Last Commit)
 
diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl
index b207e393fb..8918a8d8ad 100644
--- a/pipelines/skylab/atac/atac.wdl
+++ b/pipelines/skylab/atac/atac.wdl
@@ -46,7 +46,7 @@ workflow ATAC {
     String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
   }
 
-  String pipeline_version = "2.3.0"
+  String pipeline_version = "2.3.1"
 
   # Determine docker prefix based on cloud provider
   String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md
index 98904837e8..843e4baced 100644
--- a/pipelines/skylab/multiome/Multiome.changelog.md
+++ b/pipelines/skylab/multiome/Multiome.changelog.md
@@ -1,3 +1,7 @@
+# 5.6.1
+2024-09-11 (Date of Last Commit)
+* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the Multiome pipeline
+
 # 5.6.0
 2024-08-02 (Date of Last Commit)
 
diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index d647e82944..3979a4fa7d 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -9,7 +9,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow Multiome {
 
-    String pipeline_version = "5.6.0"
+    String pipeline_version = "5.6.1"
 
 
     input {
diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md
index f64f9eb4bc..9a51ef009d 100644
--- a/pipelines/skylab/optimus/Optimus.changelog.md
+++ b/pipelines/skylab/optimus/Optimus.changelog.md
@@ -1,3 +1,7 @@
+# 7.6.1
+2024-09-11 (Date of Last Commit)
+* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the Optimus pipeline
+
 # 7.6.0
 2024-08-06 (Date of Last Commit)
 
diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl
index 975439c9f3..eb03dfb30a 100644
--- a/pipelines/skylab/optimus/Optimus.wdl
+++ b/pipelines/skylab/optimus/Optimus.wdl
@@ -71,7 +71,7 @@ workflow Optimus {
   # version of this pipeline
 
 
-  String pipeline_version = "7.6.0"
+  String pipeline_version = "7.6.1"
 
 
   # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
@@ -91,7 +91,7 @@ workflow Optimus {
   String pytools_docker = "pytools:1.0.0-1661263730"
   String empty_drops_docker = "empty-drops:1.0.1-4.2"
   String star_docker = "star:1.0.1-2.7.11a-1692706072"
-  String warp_tools_docker_2_2_0 = "warp-tools:2.2.0"
+  String warp_tools_docker_2_2_0 = "warp-tools:2.3.0"
   String star_merge_docker = "star-merge-npz:1.2"
 
   #TODO how do we handle these?
diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md
index ba4a05376c..7ad1571702 100644
--- a/pipelines/skylab/paired_tag/PairedTag.changelog.md
+++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md
@@ -1,3 +1,7 @@
+# 1.6.1
+2024-09-11 (Date of Last Commit)
+* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the PairedTag pipeline
+
 # 1.6.0
 2024-08-02 (Date of Last Commit)
 
diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl
index 4206f4fabb..c401d25928 100644
--- a/pipelines/skylab/paired_tag/PairedTag.wdl
+++ b/pipelines/skylab/paired_tag/PairedTag.wdl
@@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow PairedTag {
 
-    String pipeline_version = "1.6.0"
+    String pipeline_version = "1.6.1"
 
 
     input {
diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md
index 4c49b67467..dbbe866338 100644
--- a/pipelines/skylab/slideseq/SlideSeq.changelog.md
+++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md
@@ -1,3 +1,8 @@
+# 3.4.1
+2024-09-11 (Date of Last Commit)
+
+* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the SlideSeq pipeline
+
 # 3.4.0
 2024-08-06 (Date of Last Commit)
 
diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl
index c449818881..0ca5d4edc7 100644
--- a/pipelines/skylab/slideseq/SlideSeq.wdl
+++ b/pipelines/skylab/slideseq/SlideSeq.wdl
@@ -25,7 +25,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow SlideSeq {
 
-    String pipeline_version = "3.4.0"
+    String pipeline_version = "3.4.1"
 
     input {
         Array[File] r1_fastq
@@ -48,7 +48,7 @@ workflow SlideSeq {
     # docker images
     String pytools_docker = "pytools:1.0.0-1661263730"
     String picard_cloud_docker = "picard-cloud:2.26.10"
-    String warp_tools_docker_2_2_0 = "warp-tools:2.2.0"
+    String warp_tools_docker_2_2_0 = "warp-tools:2.3.0"
     String star_merge_docker = "star-merge-npz:1.2"
 
     String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf"
diff --git a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md
index 5bc4212b9d..e5960f2ddf 100644
--- a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md
+++ b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md
@@ -1,3 +1,8 @@
+# 2.2.22
+2024-09-11 (Date of Last Commit)
+
+* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the MultiSmartSeq2 pipeline
+
 # 2.2.21
 2023-04-19 (Date of Last Commit)
 
diff --git a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl
index 91c9d4f882..0717f23b78 100644
--- a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl
+++ b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl
@@ -40,7 +40,7 @@ workflow MultiSampleSmartSeq2 {
       Boolean paired_end
   }
   # Version of this pipeline
-  String pipeline_version = "2.2.21"
+  String pipeline_version = "2.2.22"
 
   if (false) {
      String? none = "None"
diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
index 67d38b59b2..f6556b3bbb 100644
--- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
+++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
@@ -1,3 +1,8 @@
+# 2.0.0
+2024-09-11 (Dat of Last Commit)
+
+* Added h5ad as a format option for the cell by gene matrix output. The h5ad has the same layers and global attributes (unstructured data in h5ad) as the previous Loom output
+
 # 1.4.2
 2024-08-25-02 (Dat of Last Commit)
 
diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
index 51d6b6c212..38ae12ff23 100644
--- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
+++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
@@ -5,7 +5,7 @@ import "../../../tasks/skylab/TrimAdapters.wdl" as TrimAdapters
 import "../../../tasks/skylab/StarAlign.wdl" as StarAlign
 import "../../../tasks/skylab/Picard.wdl" as Picard
 import "../../../tasks/skylab/FeatureCounts.wdl" as CountAlignments
-import "../../../tasks/skylab/LoomUtils.wdl" as LoomUtils
+import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
 import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow MultiSampleSmartSeq2SingleNucleus {
@@ -57,7 +57,7 @@ workflow MultiSampleSmartSeq2SingleNucleus {
   }
 
   # Version of this pipeline
-  String pipeline_version = "1.4.2"
+  String pipeline_version = "2.0.0"
 
   if (false) {
      String? none = "None"
@@ -129,7 +129,7 @@ workflow MultiSampleSmartSeq2SingleNucleus {
             annotation_gtf = annotations_gtf
     }
 
-    call LoomUtils.SingleNucleusSmartSeq2LoomOutput as LoomOutput {
+    call H5adUtils.SingleNucleusSmartSeq2H5adOutput as H5adOutput {
         input:
             input_ids = input_ids,
             input_names = input_names,
@@ -144,28 +144,22 @@ workflow MultiSampleSmartSeq2SingleNucleus {
             annotation_introns_added_gtf = annotations_gtf
     }
 
-  ### Aggregate the Loom Files Directly ###
-  call LoomUtils.AggregateSmartSeq2Loom as AggregateLoom {
+  ### Aggregate the H5ad Files Directly ###
+  call H5adUtils.AggregateSmartSeq2H5ad as AggregateH5ad {
     input:
-      loom_input = LoomOutput.loom_output,
-      batch_id = batch_id,
-      batch_name = batch_name,
-      project_id = if defined(project_id) then select_first([project_id])[0] else none,
-      project_name = if defined(project_name) then select_first([project_name])[0] else none,
-      library = if defined(library) then select_first([library])[0] else none,
-      species = if defined(species) then select_first([species])[0] else none,
-      organ = if defined(organ) then select_first([organ])[0] else none,
-      pipeline_version = "MultiSampleSmartSeq2SingleNucleus_v~{pipeline_version}"
+      h5ad_input = H5adOutput.h5ad_output,
+      pipeline_version = pipeline_version,
+      batch_id = batch_id
   }
 
 
 
   ### Pipeline output ###
   output {
-    # loom output, exon/intron count tsv files and the aligned bam files
-    File loom_output = AggregateLoom.loom_output_file
+    # h5ad output, exon/intron count tsv files and the aligned bam files
+    File h5ad_output = AggregateH5ad.h5ad_output_file
     File genomic_reference_version = ReferenceCheck.genomic_ref_version
-    Array[File] exon_intron_count_files = LoomOutput.exon_intron_counts
+    Array[File] exon_intron_count_files = H5adOutput.exon_intron_counts
     Array[File] bam_files = RemoveDuplicatesFromBam.output_bam
     String pipeline_version_out = pipeline_version
   }
diff --git a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md
index 421964d455..b706b6a96e 100644
--- a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md
+++ b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md
@@ -1,3 +1,8 @@
+# 5.1.21
+2024-09-11 (Date of Last Commit)
+
+* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the SmartSeq2SingleSample pipeline
+
 # 5.1.20
 2023-04-19 (Date of Last Commit)
 
diff --git a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl
index efec1c4163..b9df384859 100644
--- a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl
+++ b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl
@@ -36,7 +36,7 @@ workflow SmartSeq2SingleSample {
   }
   
   # version of this pipeline
-  String pipeline_version = "5.1.20"
+  String pipeline_version = "5.1.21"
 
   parameter_meta {
     genome_ref_fasta: "Genome reference in fasta format"
diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl
index 20a7169d29..ea7363b738 100644
--- a/tasks/skylab/FastqProcessing.wdl
+++ b/tasks/skylab/FastqProcessing.wdl
@@ -138,7 +138,7 @@ task FastqProcessingSlidSeq {
 
 
     # Runtime attributes
-    String docker =  "us.gcr.io/broad-gotc-prod/warp-tools:2.0.0"
+    String docker =  "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0"
     Int cpu = 16
     Int machine_mb = 40000
     Int disk = ceil(size(r1_fastq, "GiB")*3 + size(r2_fastq, "GiB")*3) + 50
diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl
index f7cb7338b3..890b044680 100644
--- a/tasks/skylab/H5adUtils.wdl
+++ b/tasks/skylab/H5adUtils.wdl
@@ -552,4 +552,138 @@ task SingleNucleusSlideseqH5adOutput {
     output {
         File h5ad_output = "~{input_id}.h5ad"
     }
+}
+
+task SingleNucleusSmartSeq2H5adOutput {
+    input {
+        #runtime values
+        String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0"
+
+        Array[File] alignment_summary_metrics
+        Array[File] dedup_metrics
+        Array[File] gc_bias_summary_metrics
+
+        # introns counts
+        Array[File] introns_counts
+        # exons counts
+        Array[File] exons_counts
+        # annotation file
+        File annotation_introns_added_gtf
+        # name of the sample
+        Array[String] input_ids
+        Array[String]? input_names
+        String? input_id_metadata_field
+        String? input_name_metadata_field
+
+        String pipeline_version
+        Int preemptible = 3
+        Int disk = 200
+        Int machine_mem_mb = 8000
+        Int cpu = 4
+    }
+
+    meta {
+        description: "This task will convert output from the SmartSeq2SingleNucleus pipeline into a loom file. Contrary to the SmartSeq2 single cell where there is only RSEM counts, here we have intronic and exonic counts per gene name"
+    }
+
+    parameter_meta {
+        preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine"
+    }
+
+    command <<<
+        set -euo pipefail
+
+        declare -a introns_counts_files=(~{sep=' ' introns_counts})
+        declare -a exons_counts_files=(~{sep=' ' exons_counts})
+        declare -a output_prefix=(~{sep=' ' input_ids})
+        declare -a alignment_summary_metrics_list=(~{sep=' 'alignment_summary_metrics})
+        declare -a dedup_metrics_list=(~{sep=' 'dedup_metrics})
+        declare -a gc_bias_summary_metrics_list=(~{sep=' 'gc_bias_summary_metrics})
+
+        for (( i=0; i<${#introns_counts_files[@]}; ++i));
+        do
+        # creates a table with gene_id, gene_name, intron and exon counts
+        echo "Running create_snss2_counts_csv."
+        python /warptools/scripts/create_snss2_counts_csv.py \
+        --in-gtf ~{annotation_introns_added_gtf} \
+        --intron-counts ${introns_counts_files[$i]} \
+        --exon-counts ${exons_counts_files[$i]}  \
+        -o "${output_prefix[$i]}.exon_intron_counts.tsv"
+        echo "Success create_snss2_counts_csv."
+
+        # groups the QC file into one file
+        echo "Running GroupQCs"
+        GroupQCs -f "${alignment_summary_metrics_list[$i]}" "${dedup_metrics_list[$i]}" "${gc_bias_summary_metrics_list[$i]}" \
+        -t Picard -o "${output_prefix[$i]}.Picard_group"
+        echo "Success GroupQCs"
+
+        # create the loom file
+        echo "Running create_h5ad_snss2."
+        python3 /warptools/scripts/create_h5ad_snss2.py \
+        --qc_files "${output_prefix[$i]}.Picard_group.csv" \
+        --count_results  "${output_prefix[$i]}.exon_intron_counts.tsv" \
+        --output_h5ad_path "${output_prefix[$i]}" \
+        --input_id ${output_prefix[$i]} \
+        ~{"--input_id_metadata_field " + input_id_metadata_field} \
+        ~{"--input_name_metadata_field " + input_name_metadata_field} \
+        --pipeline_version ~{pipeline_version}
+
+        echo "Success create_h5ad_snss2"
+        done;
+    >>>
+
+    runtime {
+        docker: docker
+        cpu: cpu
+        memory: "~{machine_mem_mb} MiB"
+        disks: "local-disk ~{disk} HDD"
+        disk: disk + " GB" # TES
+        preemptible: preemptible
+    }
+
+    output {
+        Array[File] h5ad_output = glob("*.h5ad")
+        Array[File] exon_intron_counts = glob("*exon_intron_counts.tsv")
+    }
+}
+
+task AggregateSmartSeq2H5ad {
+    input {
+        Array[File] h5ad_input
+        String batch_id
+        String pipeline_version
+        String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0"
+        Int disk = 200
+        Int machine_mem_mb = 4000
+        Int cpu = 1
+    }
+
+    meta {
+        description: "aggregate the H5AD output"
+    }
+
+    command {
+        set -e
+
+        # Merge the h5ad files
+        python3 /warptools/scripts/ss2_h5ad_merge.py \
+        --input-h5ad-files ~{sep=' ' h5ad_input} \
+        --output-h5ad-file "~{batch_id}.h5ad" \
+        --batch_id ~{batch_id} \
+        --pipeline_version ~{pipeline_version}
+    }
+
+    output {
+        File h5ad_output_file = "~{batch_id}.h5ad"
+    }
+
+    runtime {
+        docker: docker
+        cpu: cpu
+        memory: "~{machine_mem_mb} MiB"
+        disks: "local-disk ~{disk} HDD"
+        disk: disk + " GB" # TES
+        preemptible: 3
+        maxRetries: 1
+    }
 }
\ No newline at end of file
diff --git a/tasks/skylab/LoomUtils.wdl b/tasks/skylab/LoomUtils.wdl
index 145b2ebc77..1c13ad7994 100644
--- a/tasks/skylab/LoomUtils.wdl
+++ b/tasks/skylab/LoomUtils.wdl
@@ -62,7 +62,7 @@ task OptimusLoomGeneration {
 
   input {
     #runtime values
-    String docker = "us.gcr.io/broad-gotc-prod/warp-tools:1.0.1-1681406657"
+    String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0"
     # name of the sample
     String input_id
     # user provided id
@@ -214,11 +214,12 @@ task AggregateSmartSeq2Loom {
 }
 
 
+
 task SingleNucleusOptimusLoomOutput {
 
     input {
         #runtime values
-        String docker = "us.gcr.io/broad-gotc-prod/warp-tools:1.0.1-1681406657"
+        String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0"
         # name of the sample
         String input_id
         # user provided id
@@ -401,7 +402,7 @@ task SlideSeqLoomOutput {
     String input_id
     String pipeline_version
 
-    String docker = "us.gcr.io/broad-gotc-prod/warp-tools:1.0.1-1681406657"
+    String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0"
     Int disk_size_gb = 200
     Int memory_mb = 18000
     Int cpu = 4
diff --git a/tasks/skylab/Metrics.wdl b/tasks/skylab/Metrics.wdl
index 1523712912..2f759071dc 100644
--- a/tasks/skylab/Metrics.wdl
+++ b/tasks/skylab/Metrics.wdl
@@ -165,7 +165,7 @@ task CalculateUMIsMetrics {
     
     # runtime values
     # Did not update docker image as this task uses loom which does not play nice with the changes
-    String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1"
+    String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0"
     Int machine_mem_mb = 16000
     Int cpu = 8
     Int disk = ceil(size(bam_input, "Gi") * 4)
@@ -240,7 +240,7 @@ task FastqMetricsSlideSeq {
 
 
     # Runtime attributes
-    String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1"
+    String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0"
     Int cpu = 16
     Int machine_mb = 40000
     Int disk = ceil(size(r1_fastq, "GiB")*3)  + 50
diff --git a/verification/VerifyMultiSampleSmartSeq2SingleNucleus.wdl b/verification/VerifyMultiSampleSmartSeq2SingleNucleus.wdl
index a5d9b8618e..8d7917f60b 100644
--- a/verification/VerifyMultiSampleSmartSeq2SingleNucleus.wdl
+++ b/verification/VerifyMultiSampleSmartSeq2SingleNucleus.wdl
@@ -6,8 +6,8 @@ workflow VerifyMultiSampleSmartSeq2SingleNucleus {
 	input {
     Array[File] truth_bams
     Array[File] test_bams
-    File truth_loom
-    File test_loom
+    File truth_h5ad
+    File test_h5ad
 
     Boolean? done
   }
@@ -21,10 +21,10 @@ workflow VerifyMultiSampleSmartSeq2SingleNucleus {
     }
 	}
   
-  call VerifyTasks.CompareLooms as CompareLooms {
+  call VerifyTasks.CompareH5adFilesGEX as CompareH5adFiles {
     input:
-      test_loom  = test_loom,
-      truth_loom = truth_loom
+      test_h5ad  = test_h5ad,
+      truth_h5ad = truth_h5ad
   }
 
   output{}
diff --git a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl
index 228b6b1f41..3c98269a4b 100644
--- a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl
+++ b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl
@@ -68,7 +68,7 @@ workflow TestMultiSampleSmartSeq2SingleNucleus {
     # Collect all of the pipeline outputs into single Array[String]
     Array[String] pipeline_outputs = flatten([
                                     [ # File outputs
-                                    MultiSampleSmartSeq2SingleNucleus.loom_output,
+                                    MultiSampleSmartSeq2SingleNucleus.h5ad_output,
                                     ],
                                     # Array[File] outputs
                                     MultiSampleSmartSeq2SingleNucleus.bam_files,
@@ -106,9 +106,9 @@ workflow TestMultiSampleSmartSeq2SingleNucleus {
             results_path = results_path,
             truth_path = truth_path
         }
-        call Utilities.GetValidationInputs as GetLoom {
+        call Utilities.GetValidationInputs as GetH5ad {
           input:
-            input_file = MultiSampleSmartSeq2SingleNucleus.loom_output,
+            input_file = MultiSampleSmartSeq2SingleNucleus.h5ad_output,
             results_path = results_path,
             truth_path = truth_path
         }
@@ -117,8 +117,8 @@ workflow TestMultiSampleSmartSeq2SingleNucleus {
         input:
           truth_bams = GetBams.truth_files, 
           test_bams = GetBams.results_files,
-          truth_loom = GetLoom.truth_file, 
-          test_loom = GetLoom.results_file,
+          truth_h5ad = GetH5ad.truth_file,
+          test_h5ad = GetH5ad.results_file,
           done = CopyToTestResults.done
       }
     }
diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md
index ea1f81efae..0838117702 100644
--- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md
+++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md
@@ -15,7 +15,7 @@ slug: /Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README
 
 The Smart-seq2 Single Nucleus Multi-Sample (Multi-snSS2) pipeline was developed in collaboration with the [BRAIN Initiative Cell Census Network](https://biccn.org/) (BICCN) to process single-nucleus RNAseq (snRNAseq) data generated by [Smart-seq2 assays](https://www.nature.com/articles/nmeth.2639). The pipeline's workflow is written in WDL, is freely available in the [WARP repository](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl) on GitHub, and can be run by any compliant WDL runner (e.g. [Crowmell](https://github.com/broadinstitute/cromwell)). 
 
-The pipeline is designed to process snRNA-seq data from multiple cells. Overall, the workflow trims paired-end FASTQ files, aligns reads to the genome using a modified GTF, [counts intronic and exonic reads](#6-creating-the-loom-cell-by-gene-matrix), and calculates quality control metrics.
+The pipeline is designed to process snRNA-seq data from multiple cells. Overall, the workflow trims paired-end FASTQ files, aligns reads to the genome using a modified GTF, [counts intronic and exonic reads](#6-creating-the-h5ad-cell-by-gene-matrix), and calculates quality control metrics.
 
 The pipeline has been scientifically validated by the BRAIN Institute. Read more in the [validation section](#validation).
 
@@ -27,7 +27,7 @@ You can run the [Smart-seq2 Single Nucleus Multi-Sample workflow](https://github
 ## Quick start table
 
 | Pipeline features | Description | Source |
-|-------------------|---------------------------------------------------------------|-----------------------|
+|---|---|---|
 | Assay type | Smart-seq2 Single Nucleus | [Smart-seq2](https://www.nature.com/articles/nprot.2014.006)
 | Overall workflow  | Quality control and transcriptome quantification. | Code available from the [WARP repository](https://github.com/broadinstitute/warp/tree/develop/pipelines/skylab/smartseq2_single_nucleus/SmartSeq2SingleNucleus.wdl) in GitHub |
 | Workflow language | WDL | [openWDL](https://github.com/openwdl/wdl) |
@@ -94,7 +94,7 @@ Overall, the Multi-snSS2 workflow:
 1. Removes duplicate reads.
 1. Calculates metrics.
 1. Quantifies gene counts.
-1. Merges exon counts, intron counts, and metrics into a Loom-formatted matrix.
+1. Merges exon counts, intron counts, and metrics into a h5ad-formatted matrix.
 
 The tools each task employs in the Multi-snSS2 workflow are detailed in the table below. 
 
@@ -102,15 +102,15 @@ To see specific tool parameters, select the task WDL link in the table; then vie
 
 | Task name and WDL link | Tool | Software | Description |
 | --- | --- | --- | --- |
-| [CheckInputs.checkInputArrays](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/CheckInputs.wdl) | --- | Bash | Checks the inputs and initiates the per cell processing.  | 
+| [CheckInputs.checkInputArrays](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/CheckInputs.wdl) | --- | Bash | Checks the inputs and initiates the per cell processing.  |
 | [StarAlign.STARGenomeRefVersion](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/StarAlign.wdl) | --- | Bash | Reads the `tar_star_reference` file to obtain the genomic reference source, build version, and annotation version. |
 | [TrimAdapters.TrimAdapters](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/TrimAdapters.wdl) | [fastq-mcf](https://github.com/ExpressionAnalysis/ea-utils/tree/master/clipper) | [ea-utils](https://github.com/ExpressionAnalysis/ea-utils) | Trims adapter sequences from the FASTQ inputs |
 | [StarAlign.StarAlignFastqMultisample](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/StarAlign.wdl) | STAR | [STAR](https://github.com/alexdobin/STAR) | Aligns reads to the genome. |
 | [Picard.RemoveDuplicatesFromBam](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/Picard.wdl) | MarkDuplicates, AddOrReplaceReadGroups | [Picard](https://broadinstitute.github.io/picard/) | Removes duplicate reads, producing a new BAM output; adds regroups to deduplicated BAM. |
 | [Picard.CollectMultipleMetricsMultiSample](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/Picard.wdl) | CollectMultipleMetrics | [Picard](https://broadinstitute.github.io/picard/) | Collects QC metrics on the deduplicated BAM files. |
 | [CountAlignments.CountAlignments](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/FeatureCounts.wdl) | FeatureCounts | [Subread](http://subread.sourceforge.net/), Python 3 | Uses a custom GTF with featureCounts and Python to mark introns, create a BAM that has alignments spanning intron-exon junctions removed, and counts exons using the custom BAM and by excluding intron tags. |
-| [LoomUtils.SingleNucleusSmartSeq2LoomOutput](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/LoomUtils.wdl) | Custom script: [ss2_loom_merge.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/ss2_loom_merge.py) | Python 3 | Creates the matrix files (Loom format) for each sample. |
-| [LoomUtils.AggregateSmartSeq2Loom](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/LoomUtils.wdl) | Custom script: [ss2_loom_merge.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/ss2_loom_merge.py) | Python 3 | Aggregates the matrix files (Loom format) for each sample to produce one final Loom output. |
+| [H5adUtils.SingleNucleusSmartSeq2H5adOutput](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/H5adUtils.wdl) | Custom script: [create_h5ad_snss2.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/create_h5ad_snss2.py) | Python 3 | Creates the matrix files (h5ad format) for each sample. |
+| [H5adUtils.AggregateSmartSeq2H5ad](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/H5adUtils.wdl)           | Custom script: [ss2_h5ad_merge.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/ss2_h5ad_merge.py)       | Python 3 | Aggregates the matrix files (h5ad format) for each sample to produce one final h5ad output. |
 
 #### 1. Trimming adapters
 The TrimAdapters task uses the adapter list reference file to run the [fastq-mcf](https://github.com/ExpressionAnalysis/ea-utils/tree/master/clipper) tool. This tool identifies the adapters in the input FASTQ files and performs clipping by using a subsampling parameter of 200,000 reads. The task outputs the trimmed FASTQ files which are then used for alignment. 
@@ -122,7 +122,7 @@ The StarAlignFastq task runs the STAR aligner on the trimmed FASTQ files. The ST
 The RemoveDuplicatesFromBam task removes multi-mapped reads, optical duplicates, and PCR duplicates from the aligned BAM. It then adds readgroup information and creates a new, coordinate-sorted aligned BAM output.
 
 #### 4. Collecting metrics
-The CollectMultipleMetrics task uses the Picard tool CollectMultipleMetrics to perform QC on the deduplicated BAM file. These metrics are copied to the final cell-by-gene matrix output (Loom file). A detailed list of these metrics can be found in the [Multi-snSS2 Count Matrix Overview](./count-matrix-overview.md).
+The CollectMultipleMetrics task uses the Picard tool CollectMultipleMetrics to perform QC on the deduplicated BAM file. These metrics are copied to the final cell-by-gene matrix output (h5ad file). A detailed list of these metrics can be found in the [Multi-snSS2 Count Matrix Overview](./count-matrix-overview.md).
 
 #### 5. Counting genes
 The CountAlignments task uses the featureCounts package to count introns and exons. First, the featureCounts tool counts intronic alignments in the deduplicated BAM using a custom GTF with annotated introns. The tool flags intronic alignments if they overlap an annotated intron by a minimum of 3 bp. 
@@ -131,28 +131,28 @@ Next, following pipeline processes established by the BICCN Whole Mouse Brain Wo
 
 Lastly, featureCounts uses the intermediate BAM with junctions removed to count exons. The final outputs of this step include a cell-by-gene matrix of intronic counts, a cell-by-gene matrix of exonic counts, and summary metrics for the different count types.
 
-#### 6. Creating the Loom cell by gene matrix
-The LoomUtils task combines the Picard metrics (alignment_summary_metrics, deduplication metrics, and the G/C bias summary metrics) with the featureCount exon and intron counts to create a Loom formatted cell-by-gene count matrix. 
+#### 6. Creating the h5ad cell by gene matrix
+The H5adUtils task combines the Picard metrics (alignment_summary_metrics, deduplication metrics, and the G/C bias summary metrics) with the featureCount exon and intron counts to create an h5ad formatted cell-by-gene count matrix. 
 
 Read full details for all the metrics in the [Multi-snSS2 Count Matrix Overview](./count-matrix-overview.md).
 
-The cell-by-gene matrix can be examined using [Loompy software](https://linnarssonlab.org/loompy/index.html). Exonic counts are stored in the main Loom matrix which is unnamed by default. They are the default return value of the `loompy.connect()` command. Intronic counts are stored in the Loom as an additional layer which is named `intron_counts`.
+The cell-by-gene matrix can be examined using [anndata software](https://anndata.readthedocs.io/en/latest/). Exonic counts are stored in the main h5ad matrix which is unnamed by default. They are the default return value of the `anndata.read_h5ad()` command. Intronic counts are stored in the h5ad as an additional layer which is named `intron_counts`.
 
-You can also access exonic and intronic counts using Loompy's `layers()` method. For example, `loompy.connect.layers[“”]` will return the exonic counts from the output Loom file. Similarly, `loompy.connect.layers[“intron_counts”]` will return the intronic counts from the output Loom. 
+You can also access exonic and intronic counts using anndatas's `layers()` method. For example, `anndata.layers[“”]` will return the exonic counts from the output h5ad file. Similarly, `anndata.connect.layers[“intron_counts”]` will return the intronic counts from the output h5ad. 
 
 Whole gene counts (which include both intronic and exonic counts) can be accessed by adding the intronic and exonic counts together. 
 
-Below is example Loompy code for accessing the Loom's exonic, intronic, and whole gene counts.
+Below is example anndata code for accessing the h5ad's exonic, intronic, and whole gene counts.
 
 ```python
-import loompy
-ds = loompy.connect("/PATH/TO/File.loom")
-count_exons = ds[:,:] #geneXcell table for the exonic read counts
-count_introns = ds.layer["intron_counts"] #geneXcell table for the intronic read counts
-gene_counts = count_exons + count_introns
+import anndata
+adata = anndata.read_h5ad("/PATH/TO/File.h5ad")
+count_exons = adata.X #geneXcell table for the exonic read counts
+count_introns = adata.layers["intron_counts"] #geneXcell table for the intronic read counts
 ```
+If you would like to get the counts for both introns and exons, you can sum the counts together.
 
-To read more about the Loom file format and use of layers, see the [Loompy documentation](https://linnarssonlab.org/loompy/index.html).
+To read more about the h5ad file format and use of layers, see the [h5ad documentation](https://anndata.readthedocs.io/en/latest/).
 
 #### 7. Outputs
 
@@ -160,16 +160,14 @@ The table below details the final outputs of the Multi-snSS2 workflow.
 
 | Output variable name | Description | Type |
 | --- | --- | --- |
-| loom_output | Cell-by-gene count matrix that includes the raw exon counts (in matrix), intron counts (in matrix layer), cell metrics (column attributes) and gene IDs (row attributes). | Loom |
-| exon_intron_count_files | Array of TXT files (one per cell) that contain intronic and exonic counts. | Array [TXT]| 
+| h5ad_output             | Cell-by-gene count matrix that includes the raw exon counts (in matrix), intron counts (in matrix layer), cell metrics (column attributes) and gene IDs (row attributes). | h5ad |
+| exon_intron_count_files | Array of TXT files (one per cell) that contain intronic and exonic counts. | Array [TXT]|
 | bam_files | Array of genome-aligned BAM files (one for each cell) generated with STAR.  | Array [BAM]|
 | pipeline_version_out | Version of the processing pipeline run on this data. | String |
 
-The Loom matrix is the default output. See the [create_loom_snss2.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/create_loom_snss2.py) script for the detailed code. This matrix contains the count matrices, as well as the gene and cell metrics detailed in the [Multi-snSS2 Count Matrix Overview](./count-matrix-overview.md).
+The h5ad matrix is the default output. See the [create_h5ad_snss2.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/create_h5ad_snss2.py) script for the detailed code. This matrix contains the count matrices, as well as the gene and cell metrics detailed in the [Multi-snSS2 Count Matrix Overview](./count-matrix-overview.md).
 
-To facilitate downstream analysis, the output Loom file contains both gene names and gene IDs.
-
-The output Loom matrix can be converted to an H5AD file using a [custom script](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/loom_to_h5ad.py) available in the [warp-tools GitHub repository](https://github.com/broadinstitute/warp-tools).
+To facilitate downstream analysis, the output h5ad file contains both gene names and gene IDs.
 
 ## Validation
 The Multi-snSS2 pipeline was scientifically validated by the BRAIN Initiatives Cell Census Network (BICCN) 2.0 Whole Mouse Brain Working Group.