diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 34b310a9..82f7ebfd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,6 +27,9 @@ jobs: NXF_VER: - "23.04.0" - "latest-everything" + TEST_PROFILE: + - "test" + - "test_sim" steps: - name: Check out pipeline code uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 @@ -40,12 +43,8 @@ jobs: uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix # TODO nf-core: You can customise CI pipeline run tests as required # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile "${{ matrix.TEST_PROFILE }}",docker --outdir ./results diff --git a/CHANGELOG.md b/CHANGELOG.md index b96488af..cb015936 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,12 +11,18 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co ### `Changed` -- [#15](https://github.com/nf-core/phaseimpute/pull/15) - Changed test csv files to point to nf-core repository -- [#16](https://github.com/nf-core/phaseimpute/pull/16) - Removed outdir from test config files +- [#18](https://github.com/nf-core/phaseimpute/pull/18) + - Maps and region by chromosome + - update tests config files + - correct meta map propagation + - Test impute and test sim works - [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv, update modules and subworkflows (glimpse1/2 and shapeit5) ### `Fixed` +- [#15](https://github.com/nf-core/phaseimpute/pull/15) - Changed test csv files to point to nf-core repository +- [#16](https://github.com/nf-core/phaseimpute/pull/16) - Removed outdir from test config files + ### `Dependencies` ### `Deprecated` diff --git a/assets/panel.csv b/assets/panel.csv index c99b845c..7286169e 100644 --- a/assets/panel.csv +++ b/assets/panel.csv @@ -1,3 +1,3 @@ -panel,vcf,index,sites,tsv,legend,phased -1000GP,1000GP.phased.vcf,1000GP.phased.vcf.csi,1000GP.sites,1000GP.tsv,,TRUE -1000GP_RePhase,1000GP.vcf,1000GP.vcf.csi,,,,FALSE +panel,chr,vcf,index +1000GP,chr21,1000GP_21.vcf,1000GP_21.vcf.csi +1000GP,chr22,1000GP_22.vcf,1000GP_22.vcf.csi diff --git a/assets/schema_input_panel.json b/assets/schema_input_panel.json index 46ce0f7c..c1107f5e 100644 --- a/assets/schema_input_panel.json +++ b/assets/schema_input_panel.json @@ -1,8 +1,8 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/phaseimpute/master/assets/schema_input.json", - "title": "nf-core/phaseimpute pipeline - params.input_region schema", - "description": "Schema for the file provided with params.input_region", + "$id": "https://raw.githubusercontent.com/nf-core/phaseimpute/master/assets/schema_input_panel.json", + "title": "nf-core/phaseimpute pipeline - params.panel schema", + "description": "Schema for the file provided with params.panel", "type": "array", "items": { "type": "object", @@ -10,39 +10,26 @@ "panel": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Panel name must be provided and cannot contain spaces", - "meta": ["panel"] + "errorMessage": "Panel name must be provided as a string and cannot contain spaces", + "meta": ["id"] }, - "vcf": { - "type": "string", - "pattern": "^\\S+\\.(vcf|bcf)(\\.gz)?$", - "errorMessage": "Panel vcf file must be provided, cannot contain spaces and must have extension '.vcf'" - }, - "index": { - "type": "string", - "pattern": "^\\S+\\.(vcf|bcf)(\\.gz)?\\.(tbi|csi)$", - "errorMessage": "Panel vcf index file must be provided, cannot contain spaces and must have extension '.vcf.tbi' or '.vcf.csi'" - }, - "sites": { + "chr": { "type": "string", - "pattern": "^\\S+\\.sites(\\.bcf)?$", - "errorMessage": "Panel sites file must be provided, cannot contain spaces and must have extension '.sites'" + "pattern": "^\\S+$", + "errorMessage": "Chromosome must be provided as a string and cannot contain spaces", + "meta": ["chr"] }, - "tsv": { + "vcf": { "type": "string", - "pattern": "^\\S+\\.tsv(\\.gz)?$", - "errorMessage": "Panel tsv file must be provided, cannot contain spaces and must have extension '.tsv'" + "pattern": "^\\S+\\.(vcf|bcf)(.gz)?$", + "errorMessage": "Panel file must be provided, cannot contain spaces and must have extension '.vcf' or '.bcf' with optional '.gz' extension" }, - "legend": { + "index": { "type": "string", - "pattern": "^\\S+\\.legend$", - "errorMessage": "Panel legend file must be provided, cannot contain spaces and must have extension '.legend'" - }, - "phased": { - "type": "boolean", - "errorMessage": "Is the vcf given phased? Must be a boolean" + "pattern": "^\\S+\\.(vcf|bcf)\\.(tbi|csi)$", + "errorMessage": "Panel index file must be provided, cannot contain spaces and must have extension '.vcf' or '.bcf' with '.csi' or '.tbi' extension" } }, - "required": ["panel", "vcf", "index", "phased"] + "required": ["panel", "chr", "vcf", "index"] } } diff --git a/assets/schema_input_region.json b/assets/schema_input_region.json index 6c871b1a..5592aea9 100644 --- a/assets/schema_input_region.json +++ b/assets/schema_input_region.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/phaseimpute/master/assets/schema_input.json", + "$id": "https://raw.githubusercontent.com/nf-core/phaseimpute/master/assets/schema_input_region.json", "title": "nf-core/phaseimpute pipeline - params.input_region schema", "description": "Schema for the file provided with params.input_region", "type": "array", @@ -8,16 +8,9 @@ "type": "object", "properties": { "chr": { - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+$" - }, - { - "type": "integer", - "pattern": "^\\d+$" - } - ] + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Chromosome name must be provided as a string and cannot contain spaces" }, "start": { "type": "integer", diff --git a/assets/schema_map.json b/assets/schema_map.json new file mode 100644 index 00000000..4b981006 --- /dev/null +++ b/assets/schema_map.json @@ -0,0 +1,24 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/phaseimpute/master/assets/schema_map.json", + "title": "nf-core/phaseimpute pipeline - params.map schema", + "description": "Schema for the file provided with params.map", + "type": "array", + "items": { + "type": "object", + "properties": { + "chr": { + "type": "string", + "pattern": "^(chr)?[0-9]+$", + "errorMessage": "Chromosome must be provided and must be a string containing only numbers, with or without the prefix 'chr'", + "meta": ["chr"] + }, + "map": { + "type": "string", + "pattern": "^\\S+\\.(g)?map(\\.gz)?$", + "errorMessage": "Map file must be provided, cannot contain spaces and must have extension '.map' or '.gmap' with optional 'gz' extension" + } + }, + "required": ["chr", "map"] + } +} diff --git a/conf/modules.config b/conf/modules.config index 0d09feb2..a04bf589 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -35,13 +35,13 @@ process { ] } - // Simulate workflow - withName: VIEW_REGION { + // Simulation workflow + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_REGION:SAMTOOLS_VIEW' { ext.args = [ ].join(' ') ext.prefix = { "${meta.id}_R${meta.region}" } } - withName: VIEW_DEPTH { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_VIEW' { ext.args = [ ].join(' ') ext.prefix = { "${meta.id}_D${meta.depth}" } diff --git a/conf/test.config b/conf/test.config index 1f046073..525d6fe0 100644 --- a/conf/test.config +++ b/conf/test.config @@ -16,16 +16,17 @@ params { // Limit resources so that this can run on GitHub Actions max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + max_memory = '2.GB' + max_time = '1.h' // Input data - input = "${projectDir}/tests/csv/bam.csv" + input = "${projectDir}/tests/csv/sample_bam.csv" + input_region = "${projectDir}/tests/csv/region.csv" // Genome references - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa" - panel = "${projectDir}/tests/csv/panel.csv" - phased = true + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa" + panel = "${projectDir}/tests/csv/panel.csv" + phased = true // Impute parameters step = "impute" diff --git a/conf/test_full.config b/conf/test_full.config index 1d7f521f..c2e654eb 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -14,14 +14,10 @@ params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed // Genome references - map = "/groups/dog/llenezet/test-datasets/data/genetic_maps.b38/chr21.b38.gmap.gz" + map = "https://bochet.gcc.biostat.washington.edu/beagle/genetic_maps/plink.GRCh38.map.zip" genome = "GRCh38" - fasta = "/groups/dog/llenezet/script/phaseimpute/data/genome.fa" // Resources increase incompatible with Github Action max_cpus = 12 @@ -29,8 +25,8 @@ params { max_time = '6.h' // Input data - input = "tests/csv/sample_sim.csv" - panel = "tests/csv/panel.csv" + input = "${projectDir}/tests/csv/sample_sim_full.csv" + panel = "${projectDir}/tests/csv/panel_full.csv" input_region_string = "all" step = "simulate" } diff --git a/conf/test_panelprep.config b/conf/test_panelprep.config deleted file mode 100644 index 2c5fcc0b..00000000 --- a/conf/test_panelprep.config +++ /dev/null @@ -1,32 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/phaseimpute -profile test_panelprep, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Test Panel preparation mode' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data - input = "${projectDir}/tests/csv/panel.csv" - input_region = "${projectDir}/tests/csv/regionsheet.csv" - genome = "GRCh38" - - map = "/groups/dog/llenezet/test-datasets/data/genetic_maps.b38/chr21.b38.gmap.gz" - fasta = "/groups/dog/llenezet/test-datasets/data/reference_genome/hs38DH.chr21.fa" - - step = "panelprep" - tools = ["glimpse2", "glimpse1"] -} diff --git a/conf/test_sim.config b/conf/test_sim.config index a773b4b8..8c2bd1f5 100644 --- a/conf/test_sim.config +++ b/conf/test_sim.config @@ -20,13 +20,12 @@ params { max_time = '6.h' // Input data - input = "tests/csv/sample_sim.csv" - input_region_file = "tests/csv/regionsheet.csv" - depth = [1, 2] - genome = "GRCh38" + input = "${projectDir}/tests/csv/sample_sim.csv" + input_region = "${projectDir}/tests/csv/region.csv" + depth = 1 - map = "/groups/dog/llenezet/test-datasets/data/genetic_maps.b38/chr21.b38.gmap.gz" - fasta = "/groups/dog/llenezet/test-datasets/data/reference_genome/hs38DH.chr21.fa" + map = "${projectDir}/tests/csv/map.csv" + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa" step = "simulate" } diff --git a/docs/development.md b/docs/development.md index 2dcc411d..8126332d 100644 --- a/docs/development.md +++ b/docs/development.md @@ -1,18 +1,21 @@ # Development -To contribute to this pipeline you will need to install the development environment: -This is possible only on linux or MacOs machine as Nextflow only work on these platform. - -```bash -conda env create -f environment.yml -conda activate nf-core-phaseimpute-1.0dev -``` - -## Add new module - -```bash -nf-core modules install -``` +## Features and tasks + +- [] Add automatic detection of chromosome name to create a renaming file for the vcf +- [] Make the different tests workflows work + - [] Simulation + - [] Validation + - [] Preprocessing + - [x] Imputation + - [] Validation + - [] Postprocessing +- [] Add support of `anyOf()` or `oneOf()` in the nf-core schema for the map, panel and region files +- [] Add nf-test for all modules and subworkflows +- [] Remove all TODOs +- [] Check if panel is necessary depending on the tool selected +- [] Set modules configuration as full path workflow:subworkflow:module +- [] Where should the map file go (separate csv or in panel csv) ## Run tests @@ -36,6 +39,7 @@ All channel need to be identified by a meta map as follow: - M : map used - T : tool used - G : reference genome used (is it needed ?) +- D : depth ## Open questions diff --git a/main.nf b/main.nf index 9bac5ebd..597a6f46 100644 --- a/main.nf +++ b/main.nf @@ -36,7 +36,8 @@ workflow NFCORE_PHASEIMPUTE { ch_input // channel: samplesheet read in from --input ch_fasta // channel: reference genome FASTA file with index ch_panel // channel: reference panel variants file - ch_regions // channel: regions to use [meta, region] + ch_regions // channel: regions to use [[chr, region], region] + ch_depth // channel: depth of coverage file [[depth], depth] ch_map // channel: map file for imputation ch_versions // channel: versions of software used @@ -49,6 +50,7 @@ workflow NFCORE_PHASEIMPUTE { ch_fasta, ch_panel, ch_regions, + ch_depth, ch_map, ch_versions ) @@ -89,6 +91,7 @@ workflow { PIPELINE_INITIALISATION.out.fasta, PIPELINE_INITIALISATION.out.panel, PIPELINE_INITIALISATION.out.regions, + PIPELINE_INITIALISATION.out.depth, PIPELINE_INITIALISATION.out.map, PIPELINE_INITIALISATION.out.versions ) diff --git a/modules/nf-core/samtools/coverage/main.nf b/modules/nf-core/samtools/coverage/main.nf index da7b3233..52f3225c 100644 --- a/modules/nf-core/samtools/coverage/main.nf +++ b/modules/nf-core/samtools/coverage/main.nf @@ -9,8 +9,7 @@ process SAMTOOLS_COVERAGE { input: tuple val(meta), path(input), path(input_index), val(region) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) + tuple val(meta2), path(fasta), path(fai) output: tuple val(meta), path("*.txt"), emit: coverage diff --git a/nextflow.config b/nextflow.config index 80adc22f..882f501c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,7 @@ params { // Input options input = null - input_region = "all" + input_region = null map = null tools = null @@ -191,6 +191,7 @@ profiles { } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } + test_sim { includeConfig 'conf/test_sim.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile diff --git a/subworkflows/local/bam_downsample/main.nf b/subworkflows/local/bam_downsample/main.nf index 4c9a5c72..4a700bcb 100644 --- a/subworkflows/local/bam_downsample/main.nf +++ b/subworkflows/local/bam_downsample/main.nf @@ -1,83 +1,65 @@ -include { SAMTOOLS_COVERAGE } from '../../../modules/nf-core/samtools/coverage/main.nf' -include { SAMTOOLS_INDEX as INDEX } from '../../../modules/nf-core/samtools/index/main.nf' -include { SAMTOOLS_VIEW as VIEW_REGION } from '../../../modules/nf-core/samtools/view/main.nf' -include { SAMTOOLS_VIEW as VIEW_DEPTH } from '../../../modules/nf-core/samtools/view/main.nf' +include { SAMTOOLS_COVERAGE } from '../../../modules/nf-core/samtools/coverage/main.nf' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main.nf' +include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view/main.nf' workflow BAM_DOWNSAMPLE { take: - ch_bam // channel: [ [id, ref], bam, bai ] - ch_depth // channel: [ val(depth) ] - ch_fasta // channel: [ fasta ] + ch_bam // channel: [ [id, genome, chr, region], bam, bai ] + ch_depth // channel: [ [depth], depth ] + ch_fasta // channel: [ [genome], fasta, fai ] main: ch_versions = Channel.empty() - // Add fasta and region to bam channel - ch_input_region = ch_bam - .combine(ch_fasta) - .combine(ch_region) - .map{ metaI, bam, index, fasta, metaR, region -> - [ metaI + metaR, bam, index, fasta, region ] - } - .combine(Channel.of([[]])) // depth parameter - - // Extract region of interest - VIEW_REGION(ch_input_region, []) - ch_versions = ch_versions.mix(VIEW_REGION.out.versions.first()) - - // Index region of interest - INDEX1 (VIEW_REGION.out.bam) - ch_versions = ch_versions.mix(INDEX1.out.versions.first()) - // Add region to channel ch_coverage = ch_bam - .map{ metaIR, bam, index -> - [ metaIR, bam, index, metaIR["region"] ] + .map{ metaICR, bam, index -> + [ metaICR, bam, index, metaICR["region"] ] } // Get coverage of the region - SAMTOOLS_COVERAGE ( ch_coverage ) // meta, bam, bai, region + SAMTOOLS_COVERAGE ( ch_coverage, ch_fasta ) // [ meta, bam, bai, region], [ meta, fasta, fai ] ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE.out.versions.first()) // Compute mean depth of the region ch_mean_depth = SAMTOOLS_COVERAGE.out.coverage .splitCsv(header: true, sep:'\t') - .map{ metaIR, row -> - [ metaIR,"${row.meandepth}" as Float ] + .map{ metaICR, row -> + [ metaICR,"${row.meandepth}" as Float ] } // Compute downsampling factor ch_depth_factor = ch_mean_depth .combine(ch_depth) - .map{ metaIR, mean, depth -> - [ metaIR, metaIR + ["depth":depth], depth as Float / mean ] + .map{ metaICR, mean, metaD, depth -> + [ metaICR, metaICR + metaD, depth as Float / mean ] } // Add all necessary channel for downsampling ch_input_downsample = ch_coverage - .combine(ch_fasta) - .combine(ch_depth_factor) - .map{ metaIR, bam, index, region, fasta, metaIRD, depth -> - [ metaIRD, bam, index, fasta, region, depth ] + .combine(ch_depth_factor, by : 0) + .map{ metaICR, bam, index, region, metaICRD, depth -> + [ metaICRD, bam, index, region, depth ] } // Downsample - VIEW_DEPTH(ch_input_downsample, []) - ch_versions = ch_versions.mix(VIEW_DEPTH.out.versions.first()) + SAMTOOLS_VIEW( + ch_input_downsample, + ch_fasta.map{ metaG, fasta, fai -> [metaG, fasta] }, + [] + ) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first()) // Index result - INDEX2(VIEW_DEPTH.out.bam) - ch_versions = ch_versions.mix(INDEX2.out.versions.first()) + SAMTOOLS_INDEX(SAMTOOLS_VIEW.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) // Aggregate bam and index - ch_bam_region = VIEW_REGION.out.bam - .combine(INDEX1.out.bai) - ch_bam_emul = VIEW_DEPTH.out.bam - .combine(INDEX2.out.bai) + ch_bam_emul = SAMTOOLS_VIEW.out.bam + .combine(SAMTOOLS_INDEX.out.bai, by:0) emit: - bam_region = ch_bam_region // channel: [ metaIR, bam, bai ] - bam_emul = ch_bam_emul // channel: [ metaIRD, bam, bai ] + bam_emul = ch_bam_emul // channel: [ [id, genome, chr, region, depth], bam, bai ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/bam_region/main.nf b/subworkflows/local/bam_region/main.nf index 9cfa1787..1968cd38 100644 --- a/subworkflows/local/bam_region/main.nf +++ b/subworkflows/local/bam_region/main.nf @@ -1,37 +1,39 @@ -include { SAMTOOLS_INDEX as INDEX1 } from '../../../modules/nf-core/samtools/index/main.nf' -include { SAMTOOLS_VIEW as VIEW_REGION } from '../../../modules/nf-core/samtools/view/main.nf' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main.nf' +include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view/main.nf' workflow BAM_REGION { take: - ch_bam // channel: [ [id, ref], bam, bai ] - ch_region // channel: [ [ref, region], val(chr:start-end) ] - ch_fasta // channel: [ fasta ] + ch_bam // channel: [ [id], bam, bai ] + ch_region // channel: [ [chr, region], val(chr:start-end) ] + ch_fasta // channel: [ [genome], fasta, fai ] main: ch_versions = Channel.empty() // Add fasta and region to bam channel ch_input_region = ch_bam - .combine(ch_fasta) .combine(ch_region) - .map{ meta, bam, index, fasta, metaR, region -> - [meta + metaR, bam, index, fasta, region] + .map{ metaI, bam, index, metaCR, region -> + [ metaI + metaCR, bam, index, region, [] ] } - .combine(Channel.of([[]])) // depth parameter // Extract region of interest - VIEW_REGION(ch_input_region, []) - ch_versions = ch_versions.mix(VIEW_REGION.out.versions.first()) + SAMTOOLS_VIEW( + ch_input_region, + ch_fasta.map{ metaG, fasta, fai -> [metaG, fasta] }, + [] + ) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first()) // Index region of interest - INDEX1 (VIEW_REGION.out.bam) - ch_versions = ch_versions.mix(INDEX1.out.versions.first()) + SAMTOOLS_INDEX(SAMTOOLS_VIEW.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - ch_bam_region = VIEW_REGION.bam - .combine(INDEX1.out.bai, by: 0) + ch_bam_region = SAMTOOLS_VIEW.out.bam + .combine(SAMTOOLS_INDEX.out.bai, by: 0) emit: - bam_region = ch_bam_region // channel: [ metaIR, bam, index ] + bam_region = ch_bam_region // channel: [ metaIGCR, bam, index ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/compute_gl/main.nf b/subworkflows/local/compute_gl/main.nf index 277933d1..3e552f64 100644 --- a/subworkflows/local/compute_gl/main.nf +++ b/subworkflows/local/compute_gl/main.nf @@ -6,18 +6,18 @@ workflow COMPUTE_GL { take: ch_input // channel: [ [id, ref], bam, bai ] - ch_target // channel: [ [panel], sites, tsv] + ch_target // channel: [ [panel, chr], sites, tsv] ch_fasta // channel: [ [ref], fasta, fai] main: - ch_versions = Channel.empty() + ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - ch_mpileup = ch_input + ch_mpileup = ch_input .combine(ch_target) - .map{metaI, bam, bai, metaP, sites, tsv -> - [metaI + metaP, bam, sites, tsv]} + .map{metaI, bam, bai, metaPC, sites, tsv -> + [metaI + metaPC, bam, sites, tsv]} BCFTOOLS_MPILEUP( ch_mpileup, diff --git a/subworkflows/local/get_panel/main.nf b/subworkflows/local/get_panel/main.nf index 4539647b..67904059 100644 --- a/subworkflows/local/get_panel/main.nf +++ b/subworkflows/local/get_panel/main.nf @@ -13,7 +13,7 @@ include { VCF_PHASE_SHAPEIT5 } from '../../../subworkflows/n workflow GET_PANEL { take: - ch_vcf // channel: [ [id], vcf, index ] + ch_vcf // channel: [ [id, chr], vcf, index ] ch_fasta // channel: [ [genome], fasta, fai ] main: @@ -80,8 +80,8 @@ workflow GET_PANEL { .combine(ch_panel_sites, by: 0) .combine(ch_panel_tsv, by: 0) .combine(ch_panel_phased, by: 0) - .map{ metaI, norm, n_index, sites, s_index, tsv, t_index, phased, p_index - -> [[panel:metaI.id], norm, n_index, sites, s_index, tsv, t_index, phased, p_index] + .map{ metaIC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index + -> [[panel:metaIC.id, chr:metaIC.chr ], norm, n_index, sites, s_index, tsv, t_index, phased, p_index] } emit: diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 164b78d5..4ce4b840 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -103,14 +103,7 @@ workflow PIPELINE_INITIALISATION { fai = SAMTOOLS_FAIDX.out.fai.map{ it[1] } } } - ch_ref_gen = ch_fasta.combine(fai) - - // - // Create map channel - // - ch_map = params.map ? - Channel.of([["map": params.map], params.map]) : - Channel.of([[],[]]) + ch_ref_gen = ch_fasta.combine(fai).collect() // // Create channel from input file provided through params.input @@ -126,38 +119,67 @@ workflow PIPELINE_INITIALISATION { // Create channel for panel // if (params.panel) { - ch_panel = Channel - .fromSamplesheet("panel") - .map { - meta,vcf,index,sites,tsv,legend,phased -> - [ meta, vcf, index ] - } + if (params.panel.endsWith("csv")) { + print("Panel file provided as input is a samplesheet") + ch_panel = Channel.fromSamplesheet("panel") + } else { + // #TODO Wait for `oneOf()` to be supported in the nextflow_schema.json + error "Panel file provided is of another format than CSV (not yet supported). Please separate your panel by chromosome and use the samplesheet format." + } + } else { + // #TODO check if panel is required + ch_panel = Channel.of([[],[],[]]) } // // Create channel from region input // - if (params.input_region) { - if (params.input_region.endsWith(".csv")) { - println "Region file provided as input is a csv file" - ch_regions = Channel.fromSamplesheet("input_region") - .map{ chr, start, end -> [["chr": chr], chr + ":" + start + "-" + end]} - .map{ metaC, region -> [metaC + ["region": region], region]} + if (params.input_region.endsWith(".csv")) { + println "Region file provided as input is a csv file" + ch_regions = Channel.fromSamplesheet("input_region") + .map{ chr, start, end -> [["chr": chr], chr + ":" + start + "-" + end]} + .map{ metaC, region -> [metaC + ["region": region], region]} + } else { + error "Region file provided is of another format than CSV (not yet supported). Please separate your reference genome by chromosome and use the samplesheet format." + /* #TODO Wait for `oneOf()` to be supported in the nextflow_schema.json + GET_REGION ( + params.input_region, + ch_ref_gen + ) + ch_versions = ch_versions.mix(GET_REGION.out.versions.first()) + ch_regions = GET_REGION.out.regions + */ + } + + // + // Create map channel + // + if (params.map) { + if (params.map.endsWith(".csv")) { + print("Map file provided as input is a samplesheet") + ch_map = Channel.fromSamplesheet("map") } else { - println "Region file provided is a single region" - GET_REGION ( - params.input_region, - ch_ref_gen - ) - ch_versions = ch_versions.mix(GET_REGION.out.versions.first()) - ch_regions = GET_REGION.out.regions + error "Map file provided is of another format than CSV (not yet supported). Please separate your reference genome by chromosome and use the samplesheet format." } + } else { + ch_map = ch_regions + .map{ metaCR, regions -> [metaCR.subMap("chr"), []] } + } + + // + // Create depth channel + // + if (params.depth) { + ch_depth = Channel.of([[depth: params.depth], params.depth]) + } else { + ch_depth = Channel.of([[],[]]) } emit: input = ch_input // [ [meta], bam, bai ] fasta = ch_ref_gen // [ [genome], fasta, fai ] - panel = ch_panel // [ [panel], panel ] + panel = ch_panel // [ [panel, chr], vcf, index ] + depth = ch_depth // [ [depth], depth ] regions = ch_regions // [ [chr, region], region ] map = ch_map // [ [map], map ] versions = ch_versions @@ -218,7 +240,9 @@ def validateInputParameters() { assert params.step, "A step must be provided" // Check that at least one tool is provided - assert params.tools, "No tools provided" + if (params.step == "impute" || params.step == "panel_prep") { + assert params.tools, "No tools provided" + } } // diff --git a/tests/csv/bam.csv b/tests/csv/bam.csv deleted file mode 100644 index 78269414..00000000 --- a/tests/csv/bam.csv +++ /dev/null @@ -1,4 +0,0 @@ -sample,bam,bai -NA12878,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA12878/NA12878.s.1x.bam,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA12878/NA12878.s.1x.bam.bai -NA19401,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA19401/NA19401.s.1x.bam,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA19401/NA19401.s.1x.bam.bai -NA20359,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA20359/NA20359.s.1x.bam,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA20359/NA20359.s.1x.bam.bai diff --git a/tests/csv/map.csv b/tests/csv/map.csv new file mode 100644 index 00000000..bf33ce2c --- /dev/null +++ b/tests/csv/map.csv @@ -0,0 +1,3 @@ +chr,map +chr21,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21/GRCh38_chr21.s.map +chr22,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/22/GRCh38_chr22.s.map diff --git a/tests/csv/panel.csv b/tests/csv/panel.csv index 91f70d1d..8a5c58b1 100644 --- a/tests/csv/panel.csv +++ b/tests/csv/panel.csv @@ -1,2 +1,3 @@ -panel,vcf,index,sites,tsv,legend,phased -1000GP.s.norel,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21_22/1000GP.chr21_22.s.norel.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21_22/1000GP.chr21_22.s.norel.bcf.csi,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21_22/1000GP.chr21_22.s.norel.sites.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21_22/1000GP.chr21_22.s.norel.tsv.gz,,TRUE +panel,chr,vcf,index +1000GP.s.norel,chr21,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.bcf.csi +1000GP.s.norel,chr22,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.bcf.csi diff --git a/tests/csv/panel_full.csv b/tests/csv/panel_full.csv new file mode 100644 index 00000000..c86f442b --- /dev/null +++ b/tests/csv/panel_full.csv @@ -0,0 +1,23 @@ +panel,chr,vcf,index +1000G_phased,chr1,s3://1000genomes/release/20130502/ALL.chr1.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr1.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr2,s3://1000genomes/release/20130502/ALL.chr2.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr2.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr3,s3://1000genomes/release/20130502/ALL.chr3.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr3.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr4,s3://1000genomes/release/20130502/ALL.chr4.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr4.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr5,s3://1000genomes/release/20130502/ALL.chr5.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr5.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr6,s3://1000genomes/release/20130502/ALL.chr6.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr6.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr7,s3://1000genomes/release/20130502/ALL.chr7.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr7.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr8,s3://1000genomes/release/20130502/ALL.chr8.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr8.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr9,s3://1000genomes/release/20130502/ALL.chr9.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr9.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr10,s3://1000genomes/release/20130502/ALL.chr10.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr10.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr11,s3://1000genomes/release/20130502/ALL.chr11.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr11.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr12,s3://1000genomes/release/20130502/ALL.chr12.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr12.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr13,s3://1000genomes/release/20130502/ALL.chr13.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr13.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr14,s3://1000genomes/release/20130502/ALL.chr14.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr14.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr15,s3://1000genomes/release/20130502/ALL.chr15.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr15.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr16,s3://1000genomes/release/20130502/ALL.chr16.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr16.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr17,s3://1000genomes/release/20130502/ALL.chr17.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr17.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr18,s3://1000genomes/release/20130502/ALL.chr18.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr18.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr19,s3://1000genomes/release/20130502/ALL.chr19.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr19.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr20,s3://1000genomes/release/20130502/ALL.chr20.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr20.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr21,s3://1000genomes/release/20130502/ALL.chr21.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr21.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G_phased,chr22,s3://1000genomes/release/20130502/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,s3://1000genomes/release/20130502/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi diff --git a/tests/csv/regionsheet.csv b/tests/csv/region.csv similarity index 100% rename from tests/csv/regionsheet.csv rename to tests/csv/region.csv diff --git a/tests/csv/sample_sim.csv b/tests/csv/sample_sim.csv new file mode 100644 index 00000000..7e614856 --- /dev/null +++ b/tests/csv/sample_sim.csv @@ -0,0 +1,4 @@ +sample,bam,bai +NA12878,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA12878/NA12878.s.bam,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA12878/NA12878.s.bam.bai +NA19401,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA19401/NA19401.s.bam,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA19401/NA19401.s.bam.bai +NA20359,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA20359/NA20359.s.bam,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA20359/NA20359.s.bam.bai diff --git a/tests/csv/sample_sim_full.csv b/tests/csv/sample_sim_full.csv new file mode 100644 index 00000000..c334c666 --- /dev/null +++ b/tests/csv/sample_sim_full.csv @@ -0,0 +1,2 @@ +sample,bam,bai +#TODO find bam not in 1000G panel diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 5a336ae1..1cde520b 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -13,12 +13,11 @@ include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_n include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../../subworkflows/local/utils_nfcore_phaseimpute_pipeline' -include { BAM_REGION } from '../../subworkflows/local/bam_region' - // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // +include { BAM_REGION } from '../../subworkflows/local/bam_region' include { BAM_DOWNSAMPLE } from '../../subworkflows/local/bam_downsample' include { COMPUTE_GL as GL_TRUTH } from '../../subworkflows/local/compute_gl' include { COMPUTE_GL as GL_INPUT } from '../../subworkflows/local/compute_gl' @@ -35,11 +34,12 @@ include { GET_PANEL } from '../../subworkflows/local/get_panel workflow PHASEIMPUTE { take: - ch_input // channel: samplesheet read in from --input + ch_input // channel: input file [ [id, chr], bam, bai ] ch_fasta // channel: fasta file [ [genome], fasta, fai ] - ch_panel // channel: panel file [ [id], vcf, index ] - ch_region // channel: region to use [meta, region] - ch_map // channel: genetic map + ch_panel // channel: panel file [ [id, chr], chr, vcf, index ] + ch_region // channel: region to use [ [chr, region], region] + ch_depth // channel: depth to downsample to [ [depth], depth ] + ch_map // channel: genetic map [ [chr], map] ch_versions // channel: versions of software used main: @@ -50,36 +50,29 @@ workflow PHASEIMPUTE { // Simulate data if asked // if (params.step == 'simulate') { - // - // Read in samplesheet, validate and stage input_simulate files - // - ch_sim_input = Channel.fromSamplesheet("input") - // Output channel of simulate process ch_sim_output = Channel.empty() // Split the bam into the region specified - ch_bam_region = BAM_REGION(ch_input_sim, ch_region, fasta) + BAM_REGION(ch_input, ch_region, ch_fasta) // Initialize channel to impute ch_bam_to_impute = Channel.empty() if (params.depth) { - // Create channel from depth parameter - ch_depth = Channel.fromList(params.depth) - // Downsample input to desired depth - BAM_DOWNSAMPLE(ch_sim_input, ch_region, ch_depth, ch_fasta) + BAM_DOWNSAMPLE( + BAM_REGION.out.bam_region, + ch_depth, + ch_fasta + ) ch_versions = ch_versions.mix(BAM_DOWNSAMPLE.out.versions.first()) - ch_sim_output = ch_sim_output.mix(BAM_DOWNSAMPLE.out.bam_emul) + ch_input = ch_input.mix(BAM_DOWNSAMPLE.out.bam_emul) } if (params.genotype) { - // Create channel from samplesheet giving the chips snp position - ch_chip_snp = Channel.fromSamplesheet("input_chip_snp") - BAM_TO_GENOTYPE(ch_sim_input, ch_region, ch_chip_snp, ch_fasta) - ch_sim_output = ch_sim_output.mix(BAM_TO_GENOTYPE.out.bam_emul) + error "Genotype simulation not yet implemented" } } @@ -94,7 +87,10 @@ workflow PHASEIMPUTE { ch_panel = VCF_CHR_RENAME.out.vcf_rename } - GET_PANEL(ch_panel, ch_fasta) + if (ch_panel.map{it[3] == null}.any()) { + print("Need to compute the sites and tsv files for the panel") + GET_PANEL(ch_panel, ch_fasta) + } ch_versions = ch_versions.mix(GET_PANEL.out.versions.first()) @@ -105,12 +101,12 @@ workflow PHASEIMPUTE { if (params.tools.contains("glimpse1")) { println "Impute with Glimpse1" ch_panel_sites_tsv = GET_PANEL.out.panel - .map{ metaP, norm, n_index, sites, s_index, tsv, t_index, phased, p_index - -> [metaP, sites, tsv] + .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index + -> [metaPC, sites, tsv] } ch_panel_phased = GET_PANEL.out.panel - .map{ metaP, norm, n_index, sites, s_index, tsv, t_index, phased, p_index - -> [metaP, phased, p_index] + .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index + -> [metaPC, phased, p_index] } // Glimpse1 subworkflow @@ -121,20 +117,24 @@ workflow PHASEIMPUTE { ) ch_multiqc_files = ch_multiqc_files.mix(GL_INPUT.out.multiqc_files) - impute_input = GL_INPUT.out.vcf // [metaIP, vcf, index] - .map {metaIP, vcf, index -> [metaIP.subMap("panel"), metaIP, vcf, index] } + impute_input = GL_INPUT.out.vcf // [metaIPC, vcf, index] + .map {metaIPC, vcf, index -> [metaIPC.subMap("panel", "chr"), metaIPC, vcf, index] } .combine(ch_panel_phased, by: 0) .combine(Channel.of([[]])) - .combine(ch_region) - .combine(ch_map) + .map { metaPC, metaIPC, vcf, index, panel, p_index, sample -> + [metaPC.subMap("chr"), metaIPC, vcf, index, panel, p_index, sample]} + .combine(ch_region + .map {metaCR, region -> [metaCR.subMap("chr"), metaCR, region]}, + by: 0) + .combine(ch_map, by: 0) .map{ - metaP, metaIP, vcf, index, panel, p_index, sample, metaR, region, metaM, map - -> [metaIP+metaR, vcf, index, sample, region, panel, p_index, map] - } //[ metaIPR, vcf, csi, sample, region, ref, ref_index, map ] + metaC, metaIPC, vcf, index, panel, p_index, sample, metaCR, region, map + -> [metaIPC+metaCR.subMap("Region"), vcf, index, sample, region, panel, p_index, map] + } //[ metaIPCR, vcf, csi, sample, region, ref, ref_index, map ] VCF_IMPUTE_GLIMPSE(impute_input) output_glimpse1 = VCF_IMPUTE_GLIMPSE.out.merged_variants - .map{ metaIPR, vcf -> [metaIPR + [tool: "Glimpse1"], vcf] } + .map{ metaIPCR, vcf -> [metaIPCR + [tool: "Glimpse1"], vcf] } ch_impute_output = ch_impute_output.mix(output_glimpse1) } if (params.tools.contains("glimpse2")) { @@ -153,12 +153,10 @@ workflow PHASEIMPUTE { } if (params.step == 'validate') { - print("Validate imputed data") error "validate step not yet implemented" } if (params.step == 'refine') { - print("Refine imputed data") error "refine step not yet implemented" }