From b50dc6f4dccb574327044aa976a35182f8e0e6d0 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Fri, 7 Jul 2023 15:58:51 -0400 Subject: [PATCH 01/24] Update StarAlign.wdl --- tasks/skylab/StarAlign.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index c3a59df19c..5cc5b86494 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -319,6 +319,7 @@ task STARsoloFastq { --outSAMtype BAM SortedByCoordinate \ --outSAMattributes UB UR UY CR CB CY NH GX GN \ --soloBarcodeReadLength 0 + --soloCellReadStats Standard fi STAR \ @@ -338,6 +339,7 @@ task STARsoloFastq { --outSAMtype BAM SortedByCoordinate \ --outSAMattributes UB UR UY CR CB CY NH GX GN \ --soloBarcodeReadLength 0 + --soloCellReadStats Standard touch barcodes_sn_rna.tsv touch features_sn_rna.tsv From f6f68ccf04fdea7b60e6674b3eec12e222959467 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 10 Jul 2023 09:11:52 -0400 Subject: [PATCH 02/24] Update StarAlign.wdl --- tasks/skylab/StarAlign.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 5cc5b86494..c33d98ca5a 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -318,7 +318,7 @@ task STARsoloFastq { --soloUMIdedup 1MM_Directional_UMItools \ --outSAMtype BAM SortedByCoordinate \ --outSAMattributes UB UR UY CR CB CY NH GX GN \ - --soloBarcodeReadLength 0 + --soloBarcodeReadLength 0 \ --soloCellReadStats Standard fi @@ -338,7 +338,7 @@ task STARsoloFastq { --soloUMIdedup 1MM_Directional_UMItools \ --outSAMtype BAM SortedByCoordinate \ --outSAMattributes UB UR UY CR CB CY NH GX GN \ - --soloBarcodeReadLength 0 + --soloBarcodeReadLength 0 \ --soloCellReadStats Standard touch barcodes_sn_rna.tsv @@ -369,6 +369,8 @@ task STARsoloFastq { echo Error: unknown counting mode: "$counting_mode". Should be either sn_rna or sc_rna. fi mv Aligned.sortedByCoord.out.bam ~{output_bam_basename}.bam + # print contents + ls >>> From a46feeb8f734386d7d37353c1e4d3d1dd1d29670 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 10 Jul 2023 10:31:21 -0400 Subject: [PATCH 03/24] Update StarAlign.wdl --- tasks/skylab/StarAlign.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index c33d98ca5a..56d61420c4 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -371,6 +371,8 @@ task STARsoloFastq { mv Aligned.sortedByCoord.out.bam ~{output_bam_basename}.bam # print contents ls + ls Solo.out/GeneFull_Ex50pAS/raw/ + ls Solo.out/GeneFull_Ex50pAS/ >>> From 766951961f9ad2f5626ac116b435729872770802 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Fri, 14 Jul 2023 14:00:14 -0400 Subject: [PATCH 04/24] Update StarAlign.wdl --- tasks/skylab/StarAlign.wdl | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 56d61420c4..ef1ad9310f 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -344,12 +344,20 @@ task STARsoloFastq { touch barcodes_sn_rna.tsv touch features_sn_rna.tsv touch matrix_sn_rna.mtx + touch CellReads_sn_rna.stats + touch Features_sn_rna.stats + touch Summary_sn_rna.stats + touch UMIperCellSorted_sn_rna.txt if [[ "~{counting_mode}" == "sc_rna" ]] then mv "Solo.out/Gene/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/Gene/raw/features.tsv" features.tsv mv "Solo.out/Gene/raw/matrix.mtx" matrix.mtx + mv "Solo.out/Gene/CellReads.stats" CellReads.stats + mv "Solo.out/Gene/Features.stats" Features.stats + mv "Solo.out/Gene/Summary.csv" Summary.csv + mv "Solo.out/Gene/UMIperCellSorted.txt" UMIperCellSorted.txt elif [[ "~{counting_mode}" == "sn_rna" ]] then if ! [[ ~{count_exons} ]] @@ -357,22 +365,31 @@ task STARsoloFastq { mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/matrix.mtx" matrix.mtx + mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats + mv "Solo.out/GeneFull_Ex50pAS/Features.stats" Features.stats + mv "Solo.out/GeneFull_Ex50pAS/Summary.csv" Summary.csv + mv "Solo.out/GeneFull_Ex50pAS/UMIperCellSorted.txt" UMIperCellSorted.txt else mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/matrix.mtx" matrix.mtx + mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats + mv "Solo.out/GeneFull_Ex50pAS/Features.stats" Features.stats + mv "Solo.out/GeneFull_Ex50pAS/Summary.csv" Summary.csv + mv "Solo.out/GeneFull_Ex50pAS/UMIperCellSorted.txt" UMIperCellSorted.txt mv "Solo.out/Gene/raw/barcodes.tsv" barcodes_sn_rna.tsv mv "Solo.out/Gene/raw/features.tsv" features_sn_rna.tsv mv "Solo.out/Gene/raw/matrix.mtx" matrix_sn_rna.mtx + mv "Solo.out/Gene/CellReads.stats" CellReads_sn_rna.stats + mv "Solo.out/Gene/Features.stats" Features_sn_rna.stats + mv "Solo.out/Gene/Summary.csv" Summary_sn_rna.csv + mv "Solo.out/Gene/UMIperCellSorted.txt" UMIperCellSorted_sn_rna.txt fi else echo Error: unknown counting mode: "$counting_mode". Should be either sn_rna or sc_rna. fi mv Aligned.sortedByCoord.out.bam ~{output_bam_basename}.bam - # print contents - ls - ls Solo.out/GeneFull_Ex50pAS/raw/ - ls Solo.out/GeneFull_Ex50pAS/ + tar -zcvf ~{output_bam_basename}.star_metrics.tar *.stats *.txt *.csv >>> @@ -395,6 +412,7 @@ task STARsoloFastq { File barcodes_sn_rna = "barcodes_sn_rna.tsv" File features_sn_rna = "features_sn_rna.tsv" File matrix_sn_rna = "matrix_sn_rna.mtx" + File aligner_metrics = "~{output_bam_basename}.star_metrics.tar" } } From a38dd02bc3c37666194c7f008a385ac7b5c8ed6d Mon Sep 17 00:00:00 2001 From: ekiernan Date: Fri, 14 Jul 2023 20:12:05 -0400 Subject: [PATCH 05/24] Update StarAlign.wdl --- tasks/skylab/StarAlign.wdl | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index ef1ad9310f..62fff3d36a 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -346,7 +346,7 @@ task STARsoloFastq { touch matrix_sn_rna.mtx touch CellReads_sn_rna.stats touch Features_sn_rna.stats - touch Summary_sn_rna.stats + touch Summary_sn_rna.csv touch UMIperCellSorted_sn_rna.txt if [[ "~{counting_mode}" == "sc_rna" ]] @@ -389,7 +389,7 @@ task STARsoloFastq { echo Error: unknown counting mode: "$counting_mode". Should be either sn_rna or sc_rna. fi mv Aligned.sortedByCoord.out.bam ~{output_bam_basename}.bam - tar -zcvf ~{output_bam_basename}.star_metrics.tar *.stats *.txt *.csv + #tar -zcvf ~{output_bam_basename}.star_metrics.tar *.stats *.txt *.csv >>> @@ -412,7 +412,15 @@ task STARsoloFastq { File barcodes_sn_rna = "barcodes_sn_rna.tsv" File features_sn_rna = "features_sn_rna.tsv" File matrix_sn_rna = "matrix_sn_rna.mtx" - File aligner_metrics = "~{output_bam_basename}.star_metrics.tar" + File CellReads = "CellReads.stats" + File Features = "Features.stats" + File summary = "Summary.csv" + File umipercell = "UMIperCellSorted.txt" + File CellReads_sn_rna = "CellReads_sn_rna.stats" + File Features_sn_rna = "Features_sn_rna.stats" + File summary_sn_rna = "Summary_sn_rna.csv" + File umipercell_sn_rna = "UMIperCellSorted_sn_rna.txt" + #File aligner_metrics = "~{output_bam_basename}.star_metrics.tar" } } @@ -422,6 +430,7 @@ task MergeStarOutput { Array[File] barcodes Array[File] features Array[File] matrix + String input_id #runtime values From 6fa2a0226870e377cd4f8cf942b22b5e21ae4572 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 09:11:58 -0400 Subject: [PATCH 06/24] fixed duplicate variable name --- pipelines/skylab/optimus/Optimus.wdl | 1 + tasks/skylab/StarAlign.wdl | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index e6b41fe69e..5e62419bf1 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -247,5 +247,6 @@ workflow Optimus { File? picard_metrics = DropseqMetrics.metric_output # h5ad File h5ad_output_file = final_h5ad_output + # File aligner_metrics = } } diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 62fff3d36a..91f790727a 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -413,11 +413,11 @@ task STARsoloFastq { File features_sn_rna = "features_sn_rna.tsv" File matrix_sn_rna = "matrix_sn_rna.mtx" File CellReads = "CellReads.stats" - File Features = "Features.stats" + File align_features = "Features.stats" File summary = "Summary.csv" File umipercell = "UMIperCellSorted.txt" File CellReads_sn_rna = "CellReads_sn_rna.stats" - File Features_sn_rna = "Features_sn_rna.stats" + File align_features_sn_rna = "Features_sn_rna.stats" File summary_sn_rna = "Summary_sn_rna.csv" File umipercell_sn_rna = "UMIperCellSorted_sn_rna.txt" #File aligner_metrics = "~{output_bam_basename}.star_metrics.tar" From 3414772d8a5e7423e22a6a9ac90419a4c393bdba Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 10:52:44 -0400 Subject: [PATCH 07/24] Gathering cell_reads --- pipelines/skylab/optimus/Optimus.wdl | 1 + tasks/skylab/StarAlign.wdl | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 5e62419bf1..25c134485a 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -209,6 +209,7 @@ workflow Optimus { barcodes = STARsoloFastq.barcodes_sn_rna, features = STARsoloFastq.features_sn_rna, matrix = STARsoloFastq.matrix_sn_rna, + cell_reads = STARsoloFastq.cell_reads_sn_rna input_id = input_id } call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{ diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 91f790727a..9747b0be44 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -412,11 +412,11 @@ task STARsoloFastq { File barcodes_sn_rna = "barcodes_sn_rna.tsv" File features_sn_rna = "features_sn_rna.tsv" File matrix_sn_rna = "matrix_sn_rna.mtx" - File CellReads = "CellReads.stats" + File cell_reads = "CellReads.stats" File align_features = "Features.stats" File summary = "Summary.csv" File umipercell = "UMIperCellSorted.txt" - File CellReads_sn_rna = "CellReads_sn_rna.stats" + File cell_reads_sn_rna = "CellReads_sn_rna.stats" File align_features_sn_rna = "Features_sn_rna.stats" File summary_sn_rna = "Summary_sn_rna.csv" File umipercell_sn_rna = "UMIperCellSorted_sn_rna.txt" @@ -430,6 +430,7 @@ task MergeStarOutput { Array[File] barcodes Array[File] features Array[File] matrix + Array[File] cell_reads String input_id @@ -457,6 +458,10 @@ task MergeStarOutput { declare -a barcodes_files=(~{sep=' ' barcodes}) declare -a features_files=(~{sep=' ' features}) declare -a matrix_files=(~{sep=' ' matrix}) + declare -a cell_reads_files=(~(sep=' ' cell_reads)) + + cat $(cell_reads_files[@]) > cell_reads.txt + # create the compressed raw count matrix with the counts, gene names and the barcodes python3 /usr/gitc/create-merged-npz-output.py \ @@ -479,6 +484,7 @@ task MergeStarOutput { File row_index = "~{input_id}_sparse_counts_row_index.npy" File col_index = "~{input_id}_sparse_counts_col_index.npy" File sparse_counts = "~{input_id}_sparse_counts.npz" + File cell_reads_out = "~{input_id}_cell_reads.txt" } } From ff95806c52be0d6e1ab5d90602810b2732c7628f Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 10:53:58 -0400 Subject: [PATCH 08/24] Update Optimus.wdl --- pipelines/skylab/optimus/Optimus.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 25c134485a..eb26fc7e15 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -173,6 +173,7 @@ workflow Optimus { barcodes = STARsoloFastq.barcodes, features = STARsoloFastq.features, matrix = STARsoloFastq.matrix, + cell_reads = STARsoloFastq.cell_reads input_id = input_id } if (counting_mode == "sc_rna"){ From 1619773c497db6bca30af07f297f6925c38be3da Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 10:54:58 -0400 Subject: [PATCH 09/24] added commas for cell_reads --- pipelines/skylab/optimus/Optimus.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index eb26fc7e15..c704949474 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -173,7 +173,7 @@ workflow Optimus { barcodes = STARsoloFastq.barcodes, features = STARsoloFastq.features, matrix = STARsoloFastq.matrix, - cell_reads = STARsoloFastq.cell_reads + cell_reads = STARsoloFastq.cell_reads, input_id = input_id } if (counting_mode == "sc_rna"){ @@ -210,7 +210,7 @@ workflow Optimus { barcodes = STARsoloFastq.barcodes_sn_rna, features = STARsoloFastq.features_sn_rna, matrix = STARsoloFastq.matrix_sn_rna, - cell_reads = STARsoloFastq.cell_reads_sn_rna + cell_reads = STARsoloFastq.cell_reads_sn_rna, input_id = input_id } call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{ From 351fdf403a4b094c32732179b72f044c65cfa2ec Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 10:58:18 -0400 Subject: [PATCH 10/24] Update StarAlign.wdl --- tasks/skylab/StarAlign.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 9747b0be44..58518b4e90 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -458,7 +458,7 @@ task MergeStarOutput { declare -a barcodes_files=(~{sep=' ' barcodes}) declare -a features_files=(~{sep=' ' features}) declare -a matrix_files=(~{sep=' ' matrix}) - declare -a cell_reads_files=(~(sep=' ' cell_reads)) + declare -a cell_reads_files=(~{sep=' ' cell_reads}) cat $(cell_reads_files[@]) > cell_reads.txt From 017c839014c0dda47c6a23efaca03fc4d3d3a66d Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 11:40:04 -0400 Subject: [PATCH 11/24] Update StarAlign.wdl --- tasks/skylab/StarAlign.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 58518b4e90..b8a002d1d4 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -460,7 +460,7 @@ task MergeStarOutput { declare -a matrix_files=(~{sep=' ' matrix}) declare -a cell_reads_files=(~{sep=' ' cell_reads}) - cat $(cell_reads_files[@]) > cell_reads.txt + cat $(cell_reads_files[@]) > ~{input_id}_cell_reads.txt # create the compressed raw count matrix with the counts, gene names and the barcodes From 32b27ecb9a660828b49b4e64273cd1ae4ecddd45 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 11:54:12 -0400 Subject: [PATCH 12/24] Update StarAlign.wdl --- tasks/skylab/StarAlign.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index b8a002d1d4..a52b63e365 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -460,7 +460,7 @@ task MergeStarOutput { declare -a matrix_files=(~{sep=' ' matrix}) declare -a cell_reads_files=(~{sep=' ' cell_reads}) - cat $(cell_reads_files[@]) > ~{input_id}_cell_reads.txt + cat ${cell_reads_files[@]} > ~{input_id}_cell_reads.txt # create the compressed raw count matrix with the counts, gene names and the barcodes From 16d005fd8c0da65c5b8f896f0ad4601ae9fabb51 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 12:44:32 -0400 Subject: [PATCH 13/24] added aligner metrics output to Optimus --- pipelines/skylab/optimus/Optimus.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index c704949474..75dcd88e8d 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -247,8 +247,8 @@ workflow Optimus { File gene_metrics = GeneMetrics.gene_metrics File? cell_calls = RunEmptyDrops.empty_drops_result File? picard_metrics = DropseqMetrics.metric_output + File aligner_metrics = MergeStarOutputs.cell_reads_out # h5ad File h5ad_output_file = final_h5ad_output - # File aligner_metrics = } } From cacadd907bfb8873bb6b54623468c326f7ec96b1 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 15:40:25 -0400 Subject: [PATCH 14/24] Update StarAlign.wdl --- tasks/skylab/StarAlign.wdl | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index a52b63e365..80e9edf569 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -430,8 +430,8 @@ task MergeStarOutput { Array[File] barcodes Array[File] features Array[File] matrix - Array[File] cell_reads - + Array[File]? cell_reads + String input_id #runtime values @@ -458,10 +458,15 @@ task MergeStarOutput { declare -a barcodes_files=(~{sep=' ' barcodes}) declare -a features_files=(~{sep=' ' features}) declare -a matrix_files=(~{sep=' ' matrix}) - declare -a cell_reads_files=(~{sep=' ' cell_reads}) - - cat ${cell_reads_files[@]} > ~{input_id}_cell_reads.txt + # optional cell_reads + + ~{if defined(cell_reads) then "declare -a cell_reads_files=(~{sep= ' ' cell_reads})" else ""} + ~{if defined(cell_reads) then "cat ${cell_reads_files[@]} > ~{input_id}_cell_reads.txt" else ""} + + + #cat ${cell_reads_files[@]} > ~{input_id}_cell_reads.txt + # create the compressed raw count matrix with the counts, gene names and the barcodes python3 /usr/gitc/create-merged-npz-output.py \ @@ -484,7 +489,7 @@ task MergeStarOutput { File row_index = "~{input_id}_sparse_counts_row_index.npy" File col_index = "~{input_id}_sparse_counts_col_index.npy" File sparse_counts = "~{input_id}_sparse_counts.npz" - File cell_reads_out = "~{input_id}_cell_reads.txt" + File? cell_reads_out = "~{input_id}_cell_reads.txt" } } From 8143147562578d75bbd1c1d0c30898c330c3a7b9 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 15:44:30 -0400 Subject: [PATCH 15/24] Update StarAlign.wdl --- tasks/skylab/StarAlign.wdl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 80e9edf569..8097a2b5f1 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -459,9 +459,11 @@ task MergeStarOutput { declare -a features_files=(~{sep=' ' features}) declare -a matrix_files=(~{sep=' ' matrix}) # optional cell_reads - - ~{if defined(cell_reads) then "declare -a cell_reads_files=(~{sep= ' ' cell_reads})" else ""} - ~{if defined(cell_reads) then "cat ${cell_reads_files[@]} > ~{input_id}_cell_reads.txt" else ""} + $DECLARE=(declare -a cell_reads_files=(~{sep= ' ' cell_reads})) + $CAT=(cat ${cell_reads_files[@]} > ~{input_id}_cell_reads.txt) + + ~{if defined(cell_reads) then "$DECLARE" else ""} + ~{if defined(cell_reads) then "$CAT" else ""} From d9196a77603e6a3ec5d9ca9a3b1526526e15e2a3 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 15:51:28 -0400 Subject: [PATCH 16/24] Update StarAlign.wdl --- tasks/skylab/StarAlign.wdl | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 8097a2b5f1..f826fd2292 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -458,17 +458,13 @@ task MergeStarOutput { declare -a barcodes_files=(~{sep=' ' barcodes}) declare -a features_files=(~{sep=' ' features}) declare -a matrix_files=(~{sep=' ' matrix}) - # optional cell_reads - $DECLARE=(declare -a cell_reads_files=(~{sep= ' ' cell_reads})) - $CAT=(cat ${cell_reads_files[@]} > ~{input_id}_cell_reads.txt) - - ~{if defined(cell_reads) then "$DECLARE" else ""} - ~{if defined(cell_reads) then "$CAT" else ""} - - - #cat ${cell_reads_files[@]} > ~{input_id}_cell_reads.txt - + $cell_reads=(~{cell_reads}) + + if [ -f "$cell_reads" ]; then + declare -a cell_reads_files=(~{sep= ' ' cell_reads}) + cat ${cell_reads_files[@]} > ~{input_id}_cell_reads.txt + fi # create the compressed raw count matrix with the counts, gene names and the barcodes python3 /usr/gitc/create-merged-npz-output.py \ From 8b3baabaadb7b2520ca12369bc16471bd7946c39 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 15:53:52 -0400 Subject: [PATCH 17/24] Update Optimus.wdl --- pipelines/skylab/optimus/Optimus.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 75dcd88e8d..5c5ed78168 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -247,7 +247,7 @@ workflow Optimus { File gene_metrics = GeneMetrics.gene_metrics File? cell_calls = RunEmptyDrops.empty_drops_result File? picard_metrics = DropseqMetrics.metric_output - File aligner_metrics = MergeStarOutputs.cell_reads_out + File? aligner_metrics = MergeStarOutputs.cell_reads_out # h5ad File h5ad_output_file = final_h5ad_output } From 9930db0f13e610463b128ba92235ee14cfa2d7c7 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 16:07:19 -0400 Subject: [PATCH 18/24] Update StarAlign.wdl --- tasks/skylab/StarAlign.wdl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index f826fd2292..43dec34778 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -461,10 +461,12 @@ task MergeStarOutput { $cell_reads=(~{cell_reads}) - if [ -f "$cell_reads" ]; then - declare -a cell_reads_files=(~{sep= ' ' cell_reads}) - cat ${cell_reads_files[@]} > ~{input_id}_cell_reads.txt - fi + for cell_read in "${cell_reads[@]}"; do + if [ -f "$cell_read" ]; then + cat "$cell_read" >> "~{input_id}_cell_reads.txt" + fi + done + # create the compressed raw count matrix with the counts, gene names and the barcodes python3 /usr/gitc/create-merged-npz-output.py \ From 792607edb0d61c72620fccc74afce095a8b3e620 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 16:17:32 -0400 Subject: [PATCH 19/24] Update StarAlign.wdl --- tasks/skylab/StarAlign.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 43dec34778..80ba68aa62 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -458,10 +458,9 @@ task MergeStarOutput { declare -a barcodes_files=(~{sep=' ' barcodes}) declare -a features_files=(~{sep=' ' features}) declare -a matrix_files=(~{sep=' ' matrix}) + decalire -a cell_reads_files=(~{sep=' ' cell_reads}) - $cell_reads=(~{cell_reads}) - - for cell_read in "${cell_reads[@]}"; do + for cell_read in "${cell_reads_files[@]}"; do if [ -f "$cell_read" ]; then cat "$cell_read" >> "~{input_id}_cell_reads.txt" fi From 98af472ed68467e5dd2f654f50c1eaac995fb524 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 17 Jul 2023 16:22:44 -0400 Subject: [PATCH 20/24] Update StarAlign.wdl --- tasks/skylab/StarAlign.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 80ba68aa62..0033a5d983 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -458,7 +458,7 @@ task MergeStarOutput { declare -a barcodes_files=(~{sep=' ' barcodes}) declare -a features_files=(~{sep=' ' features}) declare -a matrix_files=(~{sep=' ' matrix}) - decalire -a cell_reads_files=(~{sep=' ' cell_reads}) + declare -a cell_reads_files=(~{sep=' ' cell_reads}) for cell_read in "${cell_reads_files[@]}"; do if [ -f "$cell_read" ]; then From 7902a40eeacff10527d523d600a759375e52d6f2 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Tue, 18 Jul 2023 13:32:16 -0400 Subject: [PATCH 21/24] made changelog updates --- pipelines/skylab/multiome/Multiome.changelog.md | 5 +++++ pipelines/skylab/multiome/Multiome.wdl | 2 +- pipelines/skylab/optimus/Optimus.changelog.md | 5 +++++ pipelines/skylab/optimus/Optimus.wdl | 2 +- pipelines/skylab/slideseq/SlideSeq.changelog.md | 5 +++++ pipelines/skylab/slideseq/SlideSeq.wdl | 2 +- .../MultiSampleSmartSeq2SingleNucleus.changelog.md | 5 +++++ .../MultiSampleSmartSeq2SingleNucleus.wdl | 2 +- 8 files changed, 24 insertions(+), 4 deletions(-) diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index 8a6926bab4..2e2834326b 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -1,3 +1,8 @@ +# 1.0.1 +2023-07-23 (Date of Last Commit) + +* Added STARsolo v2.7.10b metric outputs as an optional pipeline output and an output of the STARalign and MergeSTAR tasks + # 1.0.0 2023-06-22 (Date of Last Commit) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index dc1ad08d1f..210a1ea07d 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -4,7 +4,7 @@ import "../../../pipelines/skylab/multiome/atac.wdl" as atac import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus workflow Multiome { - String pipeline_version = "1.0.0" + String pipeline_version = "1.0.1" input { String input_id diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md index 216d14cfe1..28b8944e6e 100644 --- a/pipelines/skylab/optimus/Optimus.changelog.md +++ b/pipelines/skylab/optimus/Optimus.changelog.md @@ -1,4 +1,9 @@ +# 5.8.4 +2023-07-18 (Date of Last Commit) + +* Added STARsolo v2.7.10b metric outputs as an optional pipeline output and an output of the STARalign and MergeSTAR tasks + # 5.8.3 2023-06-23 (Date of Last Commit) diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 5c5ed78168..a4a911fc40 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -67,7 +67,7 @@ workflow Optimus { # version of this pipeline - String pipeline_version = "5.8.3" + String pipeline_version = "5.8.4" # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays Array[Int] indices = range(length(r1_fastq)) diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md index 2252c8f607..524b926355 100644 --- a/pipelines/skylab/slideseq/SlideSeq.changelog.md +++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md @@ -1,3 +1,8 @@ +# 1.0.10 +2023-07-18 (Date of Last Commit) + +* Added STARsolo v2.7.10b metric outputs as an optional pipeline output and an output of the STARalign and MergeSTAR tasks. This does not impact the Slideseq pipeline + # 1.0.9 2023-06-14 (Date of Last Commit) diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl index ec631d8d2b..5328906c21 100644 --- a/pipelines/skylab/slideseq/SlideSeq.wdl +++ b/pipelines/skylab/slideseq/SlideSeq.wdl @@ -23,7 +23,7 @@ import "../../../tasks/skylab/MergeSortBam.wdl" as Merge workflow SlideSeq { - String pipeline_version = "1.0.9" + String pipeline_version = "1.0.10" input { Array[File] r1_fastq diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md index 61c9d639a2..3dc93d5534 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md @@ -1,3 +1,8 @@ +# 1.2.25 +2023-07-18 (Date of Last Commit) + +* Added STARsolo v2.7.10b metric outputs as an optional pipeline output and an output of the STARalign and MergeSTAR tasks. This does not impact the snSS2 pipeline + # 1.2.24 2023-06-23 (Date of Last Commit) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl index b595742c13..a2d85c804c 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl @@ -40,7 +40,7 @@ workflow MultiSampleSmartSeq2SingleNucleus { String? input_id_metadata_field } # Version of this pipeline - String pipeline_version = "1.2.24" + String pipeline_version = "1.2.25" if (false) { String? none = "None" From 34ffa95787f68f35d9fad16d37cb857f3152afee Mon Sep 17 00:00:00 2001 From: ekiernan <55763654+ekiernan@users.noreply.github.com> Date: Wed, 19 Jul 2023 10:29:08 -0400 Subject: [PATCH 22/24] deleted TAR of metrics --- tasks/skylab/StarAlign.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 0033a5d983..6744b9a17c 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -389,7 +389,6 @@ task STARsoloFastq { echo Error: unknown counting mode: "$counting_mode". Should be either sn_rna or sc_rna. fi mv Aligned.sortedByCoord.out.bam ~{output_bam_basename}.bam - #tar -zcvf ~{output_bam_basename}.star_metrics.tar *.stats *.txt *.csv >>> @@ -420,7 +419,6 @@ task STARsoloFastq { File align_features_sn_rna = "Features_sn_rna.stats" File summary_sn_rna = "Summary_sn_rna.csv" File umipercell_sn_rna = "UMIperCellSorted_sn_rna.txt" - #File aligner_metrics = "~{output_bam_basename}.star_metrics.tar" } } From e56449492302937bad9d8499557dd095d446e323 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Thu, 20 Jul 2023 07:54:16 -0400 Subject: [PATCH 23/24] Updated Optimus Readme to include aligner_metrics description in outputs --- website/docs/Pipelines/Optimus_Pipeline/README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md index 44a2b4f9fd..7ae9008339 100644 --- a/website/docs/Pipelines/Optimus_Pipeline/README.md +++ b/website/docs/Pipelines/Optimus_Pipeline/README.md @@ -7,7 +7,7 @@ slug: /Pipelines/Optimus_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [optimus_v5.8.3](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | May, 2023 | Elizabeth Kiernan | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | +| [optimus_v5.8.4](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | July, 2023 | Elizabeth Kiernan | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![Optimus_diagram](Optimus_diagram.png) @@ -249,10 +249,11 @@ The following table lists the output files produced from the pipeline. For sampl | matrix | `_sparse_counts.npz` | Converted sparse matrix file from the MergeStarOutputs task. | NPZ | | matrix_row_index | `_sparse_counts_row_index.npy` | Index of cells in count matrix. | NPY | | matrix_col_index | `_sparse_counts_col_index.npy` | Index of genes in count matrix. | NPY | -| cell_metrics | `.cell-metrics.csv.gz` | Cell metrics | Compressed CSV | Matrix of metrics by cells. | -| gene_metrics | `.gene-metrics.csv.gz` | Gene metrics | Compressed CSV | Matrix of metrics by genes. | +| cell_metrics | `.cell-metrics.csv.gz` | Matrix of metrics by cells. | Compressed CSV | +| gene_metrics | `.gene-metrics.csv.gz` | Matrix of metrics by genes. | Compressed CSV | +| aligner_metrics | `.cell_reads.txt` | Per barcode metrics (CellReads.stats) produced by the STARsolo aligner. | TXT | | cell_calls | empty_drops_result.csv | emptyDrops results from the RunEmptyDrops task. | CSV | -| h5ad_output_file | `.h5ad` | h5ad | h5ad | h5ad file with count data (exonic or whole transcript depending on the counting_mode) and metadata. | N/A | +| h5ad_output_file | `.h5ad` | h5ad file with count data (exonic or whole transcript depending on the counting_mode) and metadata. | H5AD | The h5ad matrix is the default output. This matrix contains the unnormalized (unfiltered), UMI-corrected count matrices, as well as the gene and cell metrics detailed in the [Optimus Count Matrix Overview](./Loom_schema.md). From d3d7f5eee219d52ba4be16a88cc17c8d4d242b2b Mon Sep 17 00:00:00 2001 From: ekiernan Date: Fri, 21 Jul 2023 09:28:41 -0400 Subject: [PATCH 24/24] Added all aligner metrics to a final TAR --- pipelines/skylab/optimus/Optimus.wdl | 3 +++ tasks/skylab/StarAlign.wdl | 38 +++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index a4a911fc40..3b7f708b41 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -174,6 +174,9 @@ workflow Optimus { features = STARsoloFastq.features, matrix = STARsoloFastq.matrix, cell_reads = STARsoloFastq.cell_reads, + summary = STARsoloFastq.summary, + align_features = STARsoloFastq.align_features, + umipercell = STARsoloFastq.umipercell, input_id = input_id } if (counting_mode == "sc_rna"){ diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 6744b9a17c..9e5027a9c7 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -429,6 +429,9 @@ task MergeStarOutput { Array[File] features Array[File] matrix Array[File]? cell_reads + Array[File]? summary + Array[File]? align_features + Array[File]? umipercell String input_id @@ -457,13 +460,46 @@ task MergeStarOutput { declare -a features_files=(~{sep=' ' features}) declare -a matrix_files=(~{sep=' ' matrix}) declare -a cell_reads_files=(~{sep=' ' cell_reads}) + declare -a summary_files=(~{sep=' ' summary}) + declare -a align_features_files=(~{sep=' ' align_features}) + declare -a umipercell_files=(~{sep=' ' umipercell}) for cell_read in "${cell_reads_files[@]}"; do if [ -f "$cell_read" ]; then cat "$cell_read" >> "~{input_id}_cell_reads.txt" fi done + + for summary in "${summary_files[@]}"; do + if [ -f "$summary" ]; then + cat "$summary" >> "~{input_id}_summary.txt" + fi + done + + for align_feature in "${align_features_files[@]}"; do + if [ -f "$align_feature" ]; then + cat "$align_feature" >> "~{input_id}_align_features.txt" + fi + done + + for umipercell in "${umipercell_files[@]}"; do + if [ -f "$umipercell" ]; then + cat "$umipercell" >> "~{input_id}_umipercell.txt" + fi + done + for umipercell in "${umipercell_files[@]}"; do + if [ -f "$umipercell" ]; then + cat "$umipercell" >> "~{input_id}_umipercell.txt" + fi + done + + # If text files are present, create a tar archive with them + if ls *.txt 1> /dev/null 2>&1; then + tar -zcvf ~{input_id}.star_metrics.tar *.txt + else + echo "No text files found in the folder." + fi # create the compressed raw count matrix with the counts, gene names and the barcodes python3 /usr/gitc/create-merged-npz-output.py \ @@ -486,7 +522,7 @@ task MergeStarOutput { File row_index = "~{input_id}_sparse_counts_row_index.npy" File col_index = "~{input_id}_sparse_counts_col_index.npy" File sparse_counts = "~{input_id}_sparse_counts.npz" - File? cell_reads_out = "~{input_id}_cell_reads.txt" + File? cell_reads_out = "~{input_id}.star_metrics.tar" } }