From 39f0e631fa7c4988367e18b464bfe299d4be57af Mon Sep 17 00:00:00 2001 From: "Kevin L. Keys" Date: Mon, 22 Jan 2024 02:12:00 -0800 Subject: [PATCH] Correct antigen analysis in cellranger multi (#4430) * fix incomplete specification of BEAM antigens/controls * update module tests for cellranger multi * bugfix in process selector * Remove environment.yml --------- Co-authored-by: Gregor Sturm Co-authored-by: Gregor Sturm --- .../nf-core/cellranger/multi/environment.yml | 5 - modules/nf-core/cellranger/multi/main.nf | 119 +++++++++--------- modules/nf-core/cellranger/multi/meta.yml | 74 ++++++++++- .../modules/nf-core/cellranger/multi/main.nf | 12 +- .../nf-core/cellranger/multi/nextflow.config | 2 +- .../modules/nf-core/cellranger/multi/test.yml | 22 ++-- 6 files changed, 153 insertions(+), 81 deletions(-) delete mode 100644 modules/nf-core/cellranger/multi/environment.yml diff --git a/modules/nf-core/cellranger/multi/environment.yml b/modules/nf-core/cellranger/multi/environment.yml deleted file mode 100644 index 487cdad2c194..000000000000 --- a/modules/nf-core/cellranger/multi/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: cellranger_multi -channels: - - conda-forge - - bioconda - - defaults diff --git a/modules/nf-core/cellranger/multi/main.nf b/modules/nf-core/cellranger/multi/main.nf index 55a2f1918bd4..a6597d3d49f1 100644 --- a/modules/nf-core/cellranger/multi/main.nf +++ b/modules/nf-core/cellranger/multi/main.nf @@ -18,7 +18,8 @@ process CELLRANGER_MULTI { path vdj_reference , stageAs: "references/vdj/*" path vdj_primer_index , stageAs: "references/vdj/primers/*" path fb_reference , stageAs: "references/fb/*" - path beam_panel , stageAs: "references/beam/panel/*" + path beam_antigen_panel , stageAs: "references/beam/panel/antigens/*" + path beam_control_panel , stageAs: "references/beam/panel/controls/*" path cmo_reference , stageAs: "references/cmo/*" path cmo_barcodes , stageAs: "references/cmo/barcodes/*" path cmo_barcode_assignment, stageAs: "references/cmo/sample_barcode_assignment/*" @@ -45,20 +46,21 @@ process CELLRANGER_MULTI { // empty reference channels stage as "references" // empty FASTQ channels stage as "fastqs" // empty files stage as the file name, we check against 'EMPTY' - def gex_reference_name = gex_reference.getName() != 'references' ? gex_reference.getName() : '' - def gex_frna_probeset_name = gex_frna_probeset.getBaseName() != 'EMPTY' ? gex_frna_probeset.getName() : '' - def gex_targetpanel_name = gex_targetpanel.getBaseName() != 'EMPTY' ? gex_targetpanel.getName() : '' - def fb_reference_name = fb_reference.getBaseName() != 'EMPTY' ? fb_reference.getName() : '' - def vdj_reference_name = vdj_reference.getName() != 'references' ? vdj_reference.getName() : '' - def cmo_reference_name = cmo_reference.getName() != 'EMPTY' ? cmo_reference.getName() : '' - def cmo_sample_assignment = cmo_barcode_assignment.getName() != 'EMPTY' ? cmo_barcode_assignment.getName() : '' - - def include_gex = gex_fastqs.first().getName() != 'fastqs' && gex_reference ? '[gene-expression]' : '' - def include_vdj = vdj_fastqs.first().getName() != 'fastqs' && vdj_reference ? '[vdj]' : '' - def include_beam = beam_fastqs.first().getName() != 'fastqs' && beam_panel ? '[antigen-specificity]' : '' - def include_cmo = cmo_fastqs.first().getName() != 'fastqs' && cmo_barcodes ? '[samples]' : '' - def include_fb = fb_reference.first().getName() != 'references' ? '[feature]' : '' - def include_frna = gex_frna_probeset_name && frna_sampleinfo ? '[samples]' : '' + def gex_reference_name = gex_reference.getName() != 'references' ? gex_reference.getName() : '' + def gex_frna_probeset_name = gex_frna_probeset.getBaseName() != 'EMPTY' ? gex_frna_probeset.getName() : '' + def gex_targetpanel_name = gex_targetpanel.getBaseName() != 'EMPTY' ? gex_targetpanel.getName() : '' + def fb_reference_name = fb_reference.getBaseName() != 'EMPTY' ? fb_reference.getName() : '' + def vdj_reference_name = vdj_reference.getName() != 'references' ? vdj_reference.getName() : '' + def cmo_reference_name = cmo_reference.getName() != 'EMPTY' ? cmo_reference.getName() : '' + def cmo_sample_assignment = cmo_barcode_assignment.getName() != 'EMPTY' ? cmo_barcode_assignment.getName() : '' + def beam_antigen_panel_name = beam_antigen_panel.getName() != 'EMPTY' ? beam_antigen_panel.getName() : '' + + def include_gex = gex_fastqs.first().getName() != 'fastqs' && gex_reference ? '[gene-expression]' : '' + def include_vdj = vdj_fastqs.first().getName() != 'fastqs' && vdj_reference ? '[vdj]' : '' + def include_beam = beam_fastqs.first().getName() != 'fastqs' && beam_control_panel ? '[antigen-specificity]' : '' + def include_cmo = cmo_fastqs.first().getName() != 'fastqs' && cmo_barcodes ? '[samples]' : '' + def include_fb = fb_reference.first().getName() != 'references' ? '[feature]' : '' + def include_frna = gex_frna_probeset_name && frna_sampleinfo ? '[samples]' : '' def gex_reference_path = include_gex ? "reference,\$PWD/${gex_reference_name}" : '' def fb_reference_path = include_fb ? "reference,\$PWD/${fb_reference_name}" : '' @@ -73,11 +75,14 @@ process CELLRANGER_MULTI { // VDJ inner primer set def primer_index = vdj_primer_index.getBaseName() != 'EMPTY' ? "inner-enrichment-primers,\$PWD/references/primers/${vdj_primer_index.getName()}" : '' + // BEAM antigen list, remember that this is a Feature Barcode file + def beam_antigen_csv = include_beam && beam_antigen_panel_name != '' ? "reference,\$PWD/$beam_antigen_panel_name" : '' + // pull CSV text from these reference panels // these references get appended directly to config file - def beam_csv_text = include_beam && beam_panel.size() > 0 ? beam_panel.text : '' - def cmo_csv_text = include_cmo && cmo_barcodes.size() > 0 ? cmo_barcodes : '' - def frna_csv_text = include_frna && frna_sampleinfo.size() > 0 ? frna_sampleinfo.text : '' + def beam_csv_text = include_beam && beam_control_panel.size() > 0 ? beam_control_panel : '' + def cmo_csv_text = include_cmo && cmo_barcodes.size() > 0 ? cmo_barcodes : '' + def frna_csv_text = include_frna && frna_sampleinfo.size() > 0 ? frna_sampleinfo : '' // the feature barcodes section get options for either CRISPR or antibody capture assays def fb_options = meta_ab?.options ? meta_ab.options : (meta_crispr?.options ? meta_crispr.options : [] ) @@ -127,50 +132,52 @@ process CELLRANGER_MULTI { """ cat <<-CONFIG > $config - $include_gex - $gex_reference_path - $frna_probeset - $gex_options_filter_probes - $gex_options_r1_length - $gex_options_r2_length - $gex_options_chemistry - $gex_options_expect_cells - $gex_options_force_cells - $gex_options_no_secondary - $gex_options_no_bam - $gex_options_check_library_compatibility - $target_panel - $gex_options_no_target_umi_filter - $gex_options_include_introns - $cmo_options_min_assignment_confidence - $cmo_reference_path - $cmo_barcode_path - - $include_fb - $fb_reference_path - $fb_options_r1_length - $fb_options_r2_length - - $include_vdj - $vdj_reference_path - $primer_index - $vdj_options_r1_length - $vdj_options_r2_length - - [libraries] - fastq_id,fastqs,lanes,feature_types - $fastq_gex - $fastq_vdj - $fastq_antibody - $fastq_beam - $fastq_crispr - $fastq_cmo + $include_gex + $gex_reference_path + $frna_probeset + $gex_options_filter_probes + $gex_options_r1_length + $gex_options_r2_length + $gex_options_chemistry + $gex_options_expect_cells + $gex_options_force_cells + $gex_options_no_secondary + $gex_options_no_bam + $gex_options_check_library_compatibility + $target_panel + $gex_options_no_target_umi_filter + $gex_options_include_introns + $cmo_options_min_assignment_confidence + $cmo_reference_path + $cmo_barcode_path + + $include_fb + $fb_reference_path + $fb_options_r1_length + $fb_options_r2_length + + $include_vdj + $vdj_reference_path + $primer_index + $vdj_options_r1_length + $vdj_options_r2_length + + [libraries] + fastq_id,fastqs,lanes,feature_types + $fastq_gex + $fastq_vdj + $fastq_antibody + $fastq_beam + $fastq_crispr + $fastq_cmo CONFIG if [[ "$include_cmo" ]]; then echo "$include_cmo" >> $config; fi if [[ "$include_cmo" ]]; then cat $cmo_barcodes >> $config; fi if [[ "$include_beam" ]]; then echo "$include_beam" >> $config; fi if [[ "$include_beam" ]]; then cat "$beam_csv_text" >> $config; fi + if [[ "$include_beam" ]]; then echo "[feature]" >> $config; fi + if [[ "$include_beam" ]]; then echo "$beam_antigen_csv" >> $config; fi if [[ "$include_frna" ]]; then echo "$include_frna" >> $config; fi if [[ "$include_frna" ]]; then cat "$frna_csv_text" >> $config; fi diff --git a/modules/nf-core/cellranger/multi/meta.yml b/modules/nf-core/cellranger/multi/meta.yml index 79c8f7e57466..5ac3ebdbb87b 100644 --- a/modules/nf-core/cellranger/multi/meta.yml +++ b/modules/nf-core/cellranger/multi/meta.yml @@ -16,18 +16,86 @@ tools: homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_cp tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_cp - licence: 10X Genomics EULA + licence: [10X Genomics EULA] input: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - csv: + - gex_fastqs: type: file - description: Cell Ranger config file, in modified CSV format + description: FASTQ files for gene expression analysis + pattern: "*.fastq.gz" + - vdj_fastqs: + type: file + description: FASTQ files for V(D)J immunoprofiling analysis + pattern: "*.fastq.gz" + - ab_fastqs: + type: file + description: FASTQ files for antibody analysis + pattern: "*.fastq.gz" + - beam_fastqs: + type: file + description: FASTQ files for Barcode-Enabled Antigen Mapping (antigen analysis) + pattern: "*.fastq.gz" + - cmo_fastqs: + type: file + description: FASTQ files for cell multiplexing data + pattern: "*.fastq.gz" + - gex_reference: + type: directory + description: Folder containing Cellranger gene expression reference. Can also be a gzipped tarball + pattern: "*.tar.gz" + - gex_frna_probeset: + type: file + description: Fixed RNA profiling information containing custom probes in CSV format + pattern: "*.csv" + - gex_targetpanel: + type: file + description: Declaration of the target panel for Targeted Gene Expression analysis + pattern: "*.csv" + - vdj_reference: + type: directory + description: Folder containing Cellranger V(D)J reference. Can also be a gzipped tarball + pattern: "*.tar.gz" + - vdj_primer_index: + type: file + description: List of custom V(D)J inner enrichment primers + pattern: "*.csv" + - fb_reference: + type: file + description: The Feature Barcodes used for reference in Feature Barcoding Analysis + pattern: "*.csv" + - beam_antigen_panel: + type: file + description: The BEAM manifest in Feature Barcode CSV format + pattern: "*.csv" + - beam_control_panel: + type: file + description: The BEAM antigens set to control status, with corresponding MHC alleles, in Feature Barcode CSV format + pattern: "*.csv" + - cmo_reference: + type: file + description: Path to a custom Cell Multiplexing CSV reference IDs, or the `cmo-set` option in Cellranger + pattern: "*.csv" + - cmo_barcodes: + type: file + description: A CSV file appended to the Cellranger multi config linking samples to CMO IDs + pattern: "*.csv" + - cmo_barcode_assignment: + type: file + description: A CSV file that specifies the barcode-sample assignment in Cell Multiplexing analysis + pattern: "*.csv" + - frna_sampleinfo: + type: file + description: Sample information for fixed RNA analysis pattern: "*.csv" output: + - config: + type: file + description: The resolved Cellranger multi config used for analysis + pattern: "cellranger_multi_config.csv" - outs: type: file description: Files containing the outputs of Cell Ranger diff --git a/tests/modules/nf-core/cellranger/multi/main.nf b/tests/modules/nf-core/cellranger/multi/main.nf index 5d76ecdc2b54..26c6c02eebf5 100644 --- a/tests/modules/nf-core/cellranger/multi/main.nf +++ b/tests/modules/nf-core/cellranger/multi/main.nf @@ -161,7 +161,8 @@ ch_gex_frna_probeset = Channel.fromPath( empty_file ) ch_gex_targetpanel = Channel.fromPath( empty_file ) ch_vdj_primer_index = Channel.fromPath( empty_file ) ch_fb_reference = Channel.fromPath( empty_file ) -ch_beam_panel = Channel.fromPath( empty_file ) +ch_beam_antigen_panel = Channel.fromPath( empty_file ) +ch_beam_control_panel = Channel.fromPath( empty_file ) ch_cmo_reference = Channel.fromPath( empty_file ) ch_cmo_barcodes = Channel.fromPath( empty_file ) ch_cmo_sample_assignment = Channel.fromPath( empty_file ) @@ -207,7 +208,8 @@ workflow test_cellranger_multi_10k_pbmc { ch_vdj_reference, ch_vdj_primer_index, ch_ab_reference_10k_pbmc, - ch_beam_panel, + ch_beam_antigen_panel, + ch_beam_control_panel, ch_cmo_reference, ch_cmo_barcodes, ch_cmo_sample_assignment, @@ -257,7 +259,8 @@ workflow test_cellranger_multi_10k_pbmc_cmo { ch_vdj_ref_empty, ch_vdj_primer_index, ch_fb_reference, - ch_beam_panel, + ch_beam_antigen_panel, + ch_beam_control_panel, ch_cmo_reference_10k_pbmc_cmo, ch_cmo_barcodes_10k_pbmc_cmo, ch_cmo_sample_assignment, @@ -304,7 +307,8 @@ workflow test_cellranger_multi_5k_cmvpos_tcells { ch_vdj_reference, ch_vdj_primer_index, ch_fb_reference_5k_cmvpos_tcells, - ch_beam_panel, + ch_beam_antigen_panel, + ch_beam_control_panel, ch_cmo_reference, ch_cmo_barcodes, ch_cmo_sample_assignment, diff --git a/tests/modules/nf-core/cellranger/multi/nextflow.config b/tests/modules/nf-core/cellranger/multi/nextflow.config index 0525a45e28af..63d0c958e641 100644 --- a/tests/modules/nf-core/cellranger/multi/nextflow.config +++ b/tests/modules/nf-core/cellranger/multi/nextflow.config @@ -10,7 +10,7 @@ process { stageInMode = 'copy' } - withName: 'test_cellranger_multi_10k_5k_cmvpos_tcells:CELLRANGER_MULTI' { + withName: 'test_cellranger_multi_5k_cmvpos_tcells:CELLRANGER_MULTI' { stageInMode = 'copy' } } diff --git a/tests/modules/nf-core/cellranger/multi/test.yml b/tests/modules/nf-core/cellranger/multi/test.yml index e553e0f211ff..a9796c66b91a 100644 --- a/tests/modules/nf-core/cellranger/multi/test.yml +++ b/tests/modules/nf-core/cellranger/multi/test.yml @@ -8,15 +8,14 @@ contains: - "[gene-expression]" - path: output/cellranger/genome.filtered.gtf - md5sum: 428eb76f23f848bbb727fa5001a4d0fb - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa md5sum: f315020d899597c1b57e5fe9f60f4c3e - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa.fai md5sum: 3520cd30e1b100e55f578db9c855f685 - path: output/cellranger/homo_sapiens_chr22_reference/genes/genes.gtf.gz - md5sum: 6d9b5f409bfea95022bc25b9590e194e - path: output/cellranger/homo_sapiens_chr22_reference/reference.json - md5sum: 55de930f10bc6b116c3f0c0fe0cbf4fc + contains: + - "homo_sapiens_chr22_reference" - path: output/cellranger/homo_sapiens_chr22_reference/star/Genome md5sum: 22102926fadf5890e905ca71b2da3f35 - path: output/cellranger/homo_sapiens_chr22_reference/star/SA @@ -32,7 +31,6 @@ - path: output/cellranger/homo_sapiens_chr22_reference/star/chrStart.txt md5sum: bc73df776dd3d5bb9cfcbcba60880519 - path: output/cellranger/homo_sapiens_chr22_reference/star/exonGeTrInfo.tab - md5sum: d04497f69d6ef889efd4d34fe63edcc4 - path: output/cellranger/homo_sapiens_chr22_reference/star/exonInfo.tab md5sum: 0d560290fab688b7268d88d5494bf9fe - path: output/cellranger/homo_sapiens_chr22_reference/star/geneInfo.tab @@ -123,15 +121,16 @@ contains: - "[gene-expression]" - path: output/cellranger/genome.filtered.gtf - md5sum: 428eb76f23f848bbb727fa5001a4d0fb + contains: + - "chr22" - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa md5sum: f315020d899597c1b57e5fe9f60f4c3e - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa.fai md5sum: 3520cd30e1b100e55f578db9c855f685 - path: output/cellranger/homo_sapiens_chr22_reference/genes/genes.gtf.gz - md5sum: 6d9b5f409bfea95022bc25b9590e194e - path: output/cellranger/homo_sapiens_chr22_reference/reference.json - md5sum: 55de930f10bc6b116c3f0c0fe0cbf4fc + contains: + - "homo_sapiens_chr22_reference" - path: output/cellranger/homo_sapiens_chr22_reference/star/Genome md5sum: 22102926fadf5890e905ca71b2da3f35 - path: output/cellranger/homo_sapiens_chr22_reference/star/SA @@ -147,7 +146,6 @@ - path: output/cellranger/homo_sapiens_chr22_reference/star/chrStart.txt md5sum: bc73df776dd3d5bb9cfcbcba60880519 - path: output/cellranger/homo_sapiens_chr22_reference/star/exonGeTrInfo.tab - md5sum: d04497f69d6ef889efd4d34fe63edcc4 - path: output/cellranger/homo_sapiens_chr22_reference/star/exonInfo.tab md5sum: 0d560290fab688b7268d88d5494bf9fe - path: output/cellranger/homo_sapiens_chr22_reference/star/geneInfo.tab @@ -225,15 +223,16 @@ contains: - "[gene-expression]" - path: output/cellranger/genome.filtered.gtf - md5sum: 428eb76f23f848bbb727fa5001a4d0fb + contains: + - "chr22" - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa md5sum: f315020d899597c1b57e5fe9f60f4c3e - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa.fai md5sum: 3520cd30e1b100e55f578db9c855f685 - path: output/cellranger/homo_sapiens_chr22_reference/genes/genes.gtf.gz - md5sum: 6d9b5f409bfea95022bc25b9590e194e - path: output/cellranger/homo_sapiens_chr22_reference/reference.json - md5sum: 55de930f10bc6b116c3f0c0fe0cbf4fc + contains: + - "homo_sapiens_chr22_reference" - path: output/cellranger/homo_sapiens_chr22_reference/star/Genome md5sum: 22102926fadf5890e905ca71b2da3f35 - path: output/cellranger/homo_sapiens_chr22_reference/star/SA @@ -249,7 +248,6 @@ - path: output/cellranger/homo_sapiens_chr22_reference/star/chrStart.txt md5sum: bc73df776dd3d5bb9cfcbcba60880519 - path: output/cellranger/homo_sapiens_chr22_reference/star/exonGeTrInfo.tab - md5sum: d04497f69d6ef889efd4d34fe63edcc4 - path: output/cellranger/homo_sapiens_chr22_reference/star/exonInfo.tab md5sum: 0d560290fab688b7268d88d5494bf9fe - path: output/cellranger/homo_sapiens_chr22_reference/star/geneInfo.tab