diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab..fbe5de2 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +sample,lane,project,fastq_1,fastq_2,rundir +SAMPLE_PAIRED_END,1,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir +SAMPLE_SINGLE_END,2,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir diff --git a/assets/schema_input.json b/assets/schema_input.json index 338d355..9fb321b 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -11,7 +11,19 @@ "type": "string", "pattern": "^\\S+$", "errorMessage": "Sample name must be provided and cannot contain spaces", - "meta": ["id"] + "meta": ["sample"] + }, + "lane": { + "type": "integer", + "pattern": "^\\d+$", + "errorMessage": "Lane ID must be a number", + "meta": ["lane"] + }, + "project": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Project ID cannot contain spaces", + "meta": ["project"] }, "fastq_1": { "type": "string", @@ -26,8 +38,18 @@ "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "rundir": { + "type": "string", + "format": "path", + "exists": true, + "errorMessage": "Run directory must be a path", + "meta": ["rundir"] } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "lane", "fastq_1"], + "dependentRequired": { + "fastq_2": ["fastq_1"] + } } } diff --git a/conf/test.config b/conf/test.config index fbbffdd..38e9ee3 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,7 +22,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = 'https://raw.githubusercontent.com/KarNair/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv' // Genome references genome = 'R64-1-1' diff --git a/main.nf b/main.nf index 2e9b1e3..1e9c2ed 100644 --- a/main.nf +++ b/main.nf @@ -17,7 +17,7 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { SEQINSPECTOR } from './workflows/seqinspector' +include { SEQINSPECTOR } from './workflows/seqinspector' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_seqinspector_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_seqinspector_pipeline' @@ -32,7 +32,7 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_seqi // TODO nf-core: Remove this line if you don't need a FASTA file // This is an example of how to use getGenomeAttribute() to fetch parameters // from igenomes.config using `--genome` -params.fasta = getGenomeAttribute('fasta') +// params.fasta = getGenomeAttribute('fasta') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index 56b144f..cdbb640 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -57,7 +57,7 @@ workflow PIPELINE_INITIALISATION { pre_help_text = nfCoreLogo(monochrome_logs) post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " - UTILS_NFVALIDATION_PLUGIN ( + UTILS_NFVALIDATION_PLUGIN ( // Validates parameters against $projectDir/nextflow_schema.json help, workflow_command, pre_help_text, @@ -75,29 +75,33 @@ workflow PIPELINE_INITIALISATION { // // Custom validation for pipeline parameters // - validateInputParameters() + validateInputParameters() // Runs additional validation that is not done by $projectDir/nextflow_schema.json // // Create channel from input file provided through params.input // Channel - .fromSamplesheet("input") + .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> + def id_string = "${meta.sample}_${meta.project ?: "ungrouped"}_${meta.lane}" + def updated_meta = meta + [ id: id_string ] if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + return [ updated_meta.id, updated_meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] } } .groupTuple() .map { - validateInputSamplesheet(it) - } - .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] + validateInputSamplesheet(it) // Applies additional group validation checks that schema_input.json cannot do. } + .transpose() // Replace the map below + // .map { + // meta, fastqs -> + // return [ meta, fastqs.flatten() ] + // } + .view() .set { ch_samplesheet } emit: @@ -151,7 +155,9 @@ workflow PIPELINE_COMPLETION { // Check and validate pipeline parameters // def validateInputParameters() { - genomeExistsError() + // genomeExistsError() + + // TODO: Add code to further validate pipeline parameters here } //