nf-core · maxulysse · Aug 6, 2021 · Aug 4, 2021 · Aug 4, 2021 · Aug 4, 2021
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -25,7 +25,16 @@ jobs:
         # Nextflow versions: check pipeline minimum and current latest
         nxf_ver: ['21.04.0', '']
         engine: ['docker']
-        test: ['default', 'aligner', 'gatk4_spark', 'targeted', 'skip_markduplicates', 'tumor_normal_pair', 'variant_calling', 'annotation']
+        test:
+          - 'aligner'
+          - 'annotation'
+          - 'default'
+          - 'gatk4_spark'
+          - 'save_bam_mapped'
+          - 'skip_markduplicates'
+          - 'targeted'
+          - 'tumor_normal_pair'
+          - 'variant_calling'
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2

@@ -1,3 +1,2 @@
-sample,fastq_1,fastq_2
-SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz
-SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,
+patient,sample,lane,fastq_1,fastq_2
+PATIENT_ID,SAMPLE_PAIRED_END,LANE,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz
@@ -7,15 +7,68 @@
     "items": {
         "type": "object",
         "properties": {
+            "patient": {
+                "type": "string",
+                "pattern": "^\\S+$",
+                "errorMessage": "ID name must be provided and cannot contain spaces"
+            },
             "sample": {
                 "type": "string",
                 "pattern": "^\\S+$",
                 "errorMessage": "Sample name must be provided and cannot contain spaces"
             },
+            "gender": {
+                "errorMessage": "Gender cannot contain spaces",
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "pattern": "^\\S+$"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ]
+            },
+            "status": {
+                "errorMessage": "Status can only be 0 or 1",
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "pattern": "^(0|1)*$"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ]
+            },
+            "lane": {
+                "errorMessage": "Lane cannot contain spaces",
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "pattern": "^\\S+$"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ]
+
+            },
             "fastq_1": {
-                "type": "string",
-                "pattern": "^\\S+\\.f(ast)?q\\.gz$",
-                "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
+                "errorMessage": "FastQ file for reads 1 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "pattern": "^\\S+\\.f(ast)?q\\.gz$"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ]
             },
             "fastq_2": {
                 "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
@@ -29,11 +82,89 @@
                         "maxLength": 0
                     }
                 ]
+            },
+            "table": {
+                "errorMessage": "Recalibration table cannot contain spaces and must have extension '.table'",
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "pattern": "^\\S+\\.table$"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ]
+            },
+            "cram": {
+                "errorMessage": "CRAM file cannot contain spaces and must have extension '.cram'",
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "pattern": "^\\S+\\.cram$"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ]
+            },
+            "crai": {
+                "errorMessage": "CRAM index file cannot contain spaces and must have extension '.crai'",
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "pattern": "^\\S+\\.crai$"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ]
+            },
+            "bam": {
+                "errorMessage": "BAM file cannot contain spaces and must have extension '.bam'",
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "pattern": "^\\S+\\.bam$"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ]
+            },
+            "bai": {
+                "errorMessage": "BAM index file cannot contain spaces and must have extension '.bai'",
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "pattern": "^\\S+\\.bai$"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ]
+            },
+            "vcf": {
+                "errorMessage": "VCF file for reads 1 cannot contain spaces and must have extension '.vcf' or '.vcf.gz'",
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "pattern": "^\\S+\\.vcf(\\.gz)?$"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ]
             }
         },
         "required": [
-            "sample",
-            "fastq_1"
+            "patient",
+            "sample"
         ]
     }
 }
@@ -43,16 +43,15 @@ params {
             publish_dir      = 'reference'
             publish_files    = false
         }
-        'bgziptabix_target_bed' {
-            suffix           = '.bed'
+        'msisensorpro_scan' {
             publish_dir      = 'reference'
             publish_files    = false
         }
-        'msisensorpro_scan' {
+        'samtools_faidx' {
             publish_dir      = 'reference'
             publish_files    = false
         }
-        'samtools_faidx' {
+        'bgziptabix_target_bed' {
             publish_dir      = 'reference'
             publish_files    = false
         }
@@ -73,11 +72,12 @@ params {
             publish_files    = false
         }
         // MAPPING
-        'seqkit_split2' {
-            args             = "--by-size ${params.split_fastq}"
+        'bwa_mem2_mem_tumor' {
+            args             = '-K 100000000 -M -B 3'
+            args2            = 'sort'
             publish_files    = false
         }
-        'bwa_mem1_mem' {
+        'bwa_mem2_mem' {
             args             = '-K 100000000 -M'
             args2            = 'sort'
             publish_files    = false
@@ -87,52 +87,52 @@ params {
             args2            = 'sort'
             publish_files    = false
         }
-        'bwa_mem2_mem' {
+        'bwa_mem1_mem' {
             args             = '-K 100000000 -M'
             args2            = 'sort'
             publish_files    = false
         }
-        'bwa_mem2_mem_tumor' {
-            args             = '-K 100000000 -M -B 3'
-            args2            = 'sort'
-            publish_files    = false
-        }
         'samtools_index_mapping' {
             publish_by_meta  = true
             publish_files    = ['bai':'mapped']
             publish_dir      = 'preprocessing'
         }
-        // MARKDUPLICATES
-        'markduplicates' {
-            args             = 'REMOVE_DUPLICATES=false VALIDATION_STRINGENCY=LENIENT'
-            suffix           = '.md'
+        'merge_bam_mapping' {
             publish_by_meta  = true
+            publish_files    = ['bam':'mapped']
             publish_dir      = 'preprocessing'
+        }
+        'seqkit_split2' {
+            args             = "--by-size ${params.split_fastq}"
             publish_files    = false
         }
-        'markduplicatesspark' {
-            args             = '--remove-sequencing-duplicates false -VS LENIENT'
+        // MARKDUPLICATES
+        'estimatelibrarycomplexity' {
+            args             = ''
             suffix           = '.md'
             publish_by_meta  = true
             publish_dir      = 'preprocessing'
-            publish_files    = ['cram': 'markduplicates', 'crai': 'markduplicates']
+            publish_files    = ['metrics': 'markduplicates']
         }
-        'estimatelibrarycomplexity' {
-            args             = ''
+        'markduplicates' {
+            args             = 'REMOVE_DUPLICATES=false VALIDATION_STRINGENCY=LENIENT'
             suffix           = '.md'
             publish_by_meta  = true
             publish_dir      = 'preprocessing'
-            publish_files    = ['metrics': 'markduplicates']
+            publish_files    = false
         }
-        'merge_bam_mapping' {
+        'markduplicatesspark' {
+            args             = '--remove-sequencing-duplicates false -VS LENIENT'
+            suffix           = '.md'
             publish_by_meta  = true
-            publish_files    = ['bam':'mapped']
             publish_dir      = 'preprocessing'
+            publish_files    = ['cram': 'markduplicates', 'crai': 'markduplicates']
         }
         'qualimap_bamqc_mapping' {
             args             = '--paint-chromosome-limits --genome-gc-distr HUMAN -skip-duplicated --skip-dup-mode 0 -outformat HTML'
             publish_by_meta  = true
             publish_dir      = 'reports/qualimap'
+            suffix           = '.mapped'
         }
         'samtools_stats_mapping' {
             publish_by_meta  = true
@@ -184,6 +184,7 @@ params {
             args             = '--paint-chromosome-limits --genome-gc-distr HUMAN -skip-duplicated --skip-dup-mode 0 -outformat HTML'
             publish_by_meta  = true
             publish_dir      = 'reports/qualimap'
+            suffix           = '.recal'
         }
         'samtools_index_recalibrate' {
             suffix           = 'recal'

@@ -24,7 +24,7 @@ params {
     help = false
     no_intervals = false // Intervals will be built from the fasta file
     nucleotides_per_second = 1000 // Default interval size
-    sentieon = null // Not using Sentieon by default
+    sentieon = false // Not using Sentieon by default
     skip_qc = null // All QC tools are used
     target_bed = false // No default TargetBED file for targeted sequencing
     tools = null // No default Variant_Calling or Annotation tools
@@ -37,30 +37,30 @@ params {
     three_prime_clip_r2 = 0
     trim_nextseq = 0
     save_trimmed = false
-    split_fastq = 0 // Fastq files will not be split by default
+    split_fastq = 0 // FASTQ files will not be split by default
 
     // Preprocessing
     aligner = 'bwa-mem'
     markdup_java_options = '"-Xms4000m -Xmx7g"' // Established values for markDuplicates memory consumption, see https://github.com/SciLifeLab/Sarek/pull/689 for details
     use_gatk_spark = false // GATK Spark implementation of their tools in local mode not used by default
-    save_bam_mapped = null // Mapped BAMs not saved
+    save_bam_mapped = false // Mapped BAMs not saved
     skip_markduplicates = false // Do not skip markDuplicates by default
     sequencing_center = null // No sequencing center to be written in BAM header in MapReads process
 
     // Variant Calling
     ascat_ploidy = null // Use default value
     ascat_purity = null // Use default value
-    cf_coeff = 0.05                    // default value for Control-FREEC
-    cf_contamination = null            // by default not specified in Control-FREEC
-    cf_contamination_adjustment = null // by default we are not using this in Control-FREEC
-    cf_ploidy = 2                      // you can use 2,3,4
-    cf_window = null                   // by default we are not using this in Control-FREEC
-    generate_gvcf = null // g.vcf are not produced by HaplotypeCaller by default
-    no_strelka_bp = null // Strelka will use Manta candidateSmallIndels if available
+    cf_coeff = 0.05                     // default value for Control-FREEC
+    cf_contamination = null             // by default not specified in Control-FREEC
+    cf_contamination_adjustment = false // by default we are not using this in Control-FREEC
+    cf_ploidy = 2                       // you can use 2,3,4
+    cf_window = null                    // by default we are not using this in Control-FREEC
+    generate_gvcf = false // g.vcf are not produced by HaplotypeCaller by default
+    no_strelka_bp = false // Strelka will use Manta candidateSmallIndels if available
     pon = false // No default PON (Panel of Normals) file for GATK Mutect2 / Sentieon TNscope
     pon_tbi = false // No default PON index for GATK Mutect2 / Sentieon TNscope
-    ignore_soft_clipped_bases = null // no --dont-use-soft-clipped-bases for GATK Mutect2
-    umi = null // no umi
+    ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2
+    umi = false // no umi
     read_structure1 = null // no umi
     read_structure2 = null // no umi