2.0.0-beta.3 (#349)

* bump version * bump pgscatalog-utils version to 1.3.0 * update scoring files used in test profile * add warnings to the test profile * test log warning -> info * add error for -profile test and --run_ancestry * bump match version in test suite * Update modules.config * Update environment.yml * Update test.yml * update test profile message * check scoring variants matches input listed variants * fix sscore.vars path * suppress zcat warnings * use --force instead of --quiet to handle uncompressed data * stop using System.exit, which is deprecated by nextflow * make CI ignore docs * ignore docs on PRs too * bump fraposa version * bump fraposa version * drop subscribe on error checking because it hides error causes
PGScatalog · Aug 9, 2024 · 96fbb23 · 96fbb23
1 parent 60b166a
commit 96fbb23
Show file tree

Hide file tree

Showing 15 changed files with 61 additions and 60 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,11 +1,15 @@
 name: CI
 on:
   push:
+    paths-ignore:
+      - 'docs/**'
     branches:
       - dev
       - main
       - ci
   pull_request:
+    paths-ignore:
+      - 'docs/**'    
     branches:
       - dev
       - main

diff --git a/assets/examples/scorefiles/PGS001229_22_duplicate.txt b/assets/examples/scorefiles/PGS001229_22_duplicate.txt
@@ -0,0 +1,13 @@
+#pgs_id=duplicatetest
+#pgs_name=duplicatetest
+#trait_reported=duplicate test
+#genome_build=GRCh37
+rsID	chr_name	chr_position	effect_allele	other_allele	effect_weight	is_haplotype	imputation_method	locus_name	variant_description
+rs5746679	22	17080378	A	G	1.045457e-02	False			
+rs2192430	22	17300230	A	G	1.411475e-04	False			
+rs165636	22	17318864	A	C	8.166266e-03	False			
+rs165808	22	17327595	T	C	7.791641e-03	False			
+rs5748743	22	17409813	A	G	3.108784e-04	False			
+rs11703655	22	17450952	G	A	-3.033983e-02	False			
+rs2192155	22	17492533	G	A	3.889990e-03	False			
+rs2845402	22	17542810	C	T	8.036290e-03	False			
diff --git a/conf/modules.config b/conf/modules.config
@@ -38,8 +38,8 @@ process {
         ext.conda = "$projectDir/environments/pgscatalog_utils/environment.yml"
         ext.docker = 'ghcr.io/pgscatalog/pygscatalog'
         ext.singularity = 'oras://ghcr.io/pgscatalog/pygscatalog'
-        ext.docker_version = ':pgscatalog-utils-1.2.0'
-        ext.singularity_version = ':pgscatalog-utils-1.2.0-singularity'
+        ext.docker_version = ':pgscatalog-utils-1.3.1'
+        ext.singularity_version = ':pgscatalog-utils-1.3.1-singularity'
     }
 
     withLabel: plink2 {
@@ -77,9 +77,9 @@ process {
     withLabel: fraposa {
         ext.conda = "$projectDir/environments/fraposa/environment.yml"
         ext.singularity = 'oras://ghcr.io/pgscatalog/fraposa_pgsc'
-        ext.singularity_version = ':v1.0.0-singularity'
+        ext.singularity_version = ':v1.0.2-singularity'
         ext.docker = 'ghcr.io/pgscatalog/fraposa_pgsc'
-        ext.docker_version = ':v1.0.0'
+        ext.docker_version = ':v1.0.2'
     }
 
     // output configuration

diff --git a/conf/test.config b/conf/test.config
@@ -21,7 +21,7 @@ params {
 
     input  = "$projectDir/assets/examples/samplesheet.csv"
     format = "csv"
-    scorefile = "$projectDir/assets/examples/scorefiles/PGS001229_22.txt" 
+    scorefile = "$projectDir/assets/examples/scorefiles/PGS001229*.txt" 
 
     outdir = "$projectDir/results"
     target_build = "GRCh37"

diff --git a/environments/fraposa/environment.yml b/environments/fraposa/environment.yml
@@ -4,4 +4,4 @@ channels:
   - bioconda
   - nodefaults  
 dependencies:
-  - fraposa-pgsc=1.0.0
+  - fraposa-pgsc=1.0.2
diff --git a/environments/pgscatalog_utils/environment.yml b/environments/pgscatalog_utils/environment.yml
@@ -4,4 +4,4 @@ channels:
   - bioconda
   - nodefaults  
 dependencies:
-  - pgscatalog-utils=1.2.0
+  - pgscatalog-utils=1.3.1
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
@@ -73,5 +73,13 @@ class WorkflowMain {
         if (args[0]) {
             log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${args[0]}` has been detected.\n      Hint: A common mistake is to provide multiple values to `-profile` separated by spaces. Please use commas to separate profiles instead,e.g., `-profile docker,test`."
         }
+        if (profile.contains("test")) {
+            def test_warn = """
+                | INFO: The test profile is used to install the workflow and verify the software is working correctly on your system.
+                | INFO: Test input data and results are are only useful as examples of outputs, and are not biologically meaningful.
+            """        
+            log.info test_warn.stripMargin().stripIndent()
+        }
+
     }
 }
diff --git a/modules/local/plink2_score.nf b/modules/local/plink2_score.nf
@@ -3,7 +3,6 @@ process PLINK2_SCORE {
     // labels are defined in conf/modules.config
     label 'process_low'
     label 'process_long'
-    label 'error_retry'
     label 'plink2' // controls conda, docker, + singularity options
 
     tag "$meta.id chromosome $meta.chrom effect type $scoremeta.effect_type $scoremeta.n"
@@ -51,7 +50,6 @@ process PLINK2_SCORE {
     args2 = [args2, cols, 'list-variants', no_imputation, recessive, dominant, error_on_freq_calc].join(' ')
     outmeta = meta + ["n": scoremeta.n, "effect_type": scoremeta.effect_type]
     output = "${meta.id}_${meta.chrom}_${scoremeta.effect_type}_${scoremeta.n}"
-
     // speed up the calculation by only considering scoring-file variants for allele frequency calculation (--extract)
     if (scoremeta.n_scores.toInteger() == 1)
         """
@@ -66,6 +64,16 @@ process PLINK2_SCORE {
             $input ${geno.baseName} \
             --out ${output}
 
+        n_missing=\$(comm -3 <(zcat --force $scorefile | tail -n +2 | cut -f 1 | sort) <(sort ${output}.sscore.vars) | wc -l | tr -d ' ')
+
+        if [ \$n_missing -gt 0 ]
+        then
+          echo "ERROR: \$n_missing variant(s) missing from final calculated score!"
+          exit 1
+        else
+          echo "INFO: Scoring file variants match listed variants in sscore.vars"
+        fi
+
         cat <<-END_VERSIONS > versions.yml
         ${task.process.tokenize(':').last()}:
             plink2: \$(plink2 --version 2>&1 | sed 's/^PLINK v//; s/ 64.*\$//' )
@@ -85,6 +93,16 @@ process PLINK2_SCORE {
             $input ${geno.baseName} \
             --out ${output}
 
+        n_missing=\$(comm -3 <(zcat --force $scorefile | tail -n +2 | cut -f 1 | sort) <(sort ${output}.sscore.vars) | wc -l | tr -d ' ')
+
+        if [ \$n_missing -gt 0 ]
+        then
+          echo "ERROR: \$n_missing variant(s) missing from final calculated score!"
+          exit 1
+        else
+          echo "INFO: Scoring file variants match listed variants in sscore.vars"
+        fi
+
         cat <<-END_VERSIONS > versions.yml
         ${task.process.tokenize(':').last()}:
             plink2: \$(plink2 --version 2>&1 | sed 's/^PLINK v//; s/ 64.*\$//' )

diff --git a/nextflow.config b/nextflow.config
@@ -259,7 +259,7 @@ manifest {
     description     = 'The Polygenic Score Catalog Calculator is a nextflow pipeline for polygenic score calculation'
     mainScript      = 'main.nf'
     nextflowVersion = '>=23.10.0'
-    version         = '2.0.0-beta.2'
+    version         = '2.0.0-beta.3'
 }
 
 // Load modules.config for DSL2 module specific options

diff --git a/subworkflows/local/ancestry/ancestry_project.nf b/subworkflows/local/ancestry/ancestry_project.nf
@@ -253,11 +253,6 @@ workflow ANCESTRY_PROJECT {
         .groupTuple()
         .set { ch_projections }
 
-    // projections are a mandatory output of the subworkflow
-    def project_fail = true
-    FRAPOSA_PROJECT.out.pca.subscribe onNext: { project_fail = false },
-        onComplete: { projection_error(project_fail) }
-
     emit:
     intersection = INTERSECT_VARIANTS.out.intersection
     intersect_count = INTERSECT_VARIANTS.out.intersect_count.collect()
@@ -270,10 +265,3 @@ workflow ANCESTRY_PROJECT {
     versions = ch_versions
 
 }
-
-def projection_error(boolean fail) {
-    if (fail) {
-        log.error "ERROR: Projection subworkflow failed"
-        System.exit(1)
-    }
-}
diff --git a/subworkflows/local/apply_score.nf b/subworkflows/local/apply_score.nf
@@ -126,23 +126,11 @@ workflow APPLY_SCORE {
     SCORE_AGGREGATE ( ch_scores )
     ch_versions = ch_versions.mix(SCORE_AGGREGATE.out.versions)
 
-    // aggregated score output from this subworkflow is mandatory
-    def aggregate_fail = true
-    SCORE_AGGREGATE.out.scores.subscribe onNext: { aggregate_fail = false },
-      onComplete: { aggregate_error(aggregate_fail) }
-
     emit:
     versions = ch_versions
     scores = SCORE_AGGREGATE.out.scores
 }
 
-def aggregate_error(boolean fail) {
-    if (fail) {
-        log.error "ERROR: No scores calculated!"
-        System.exit(1)
-    }
-}
-
 def annotate_scorefiles(ArrayList scorefiles) {
     // INPUT:
     // [[meta], [scorefile_1, ..., scorefile_n]] -> flat list

diff --git a/subworkflows/local/match.nf b/subworkflows/local/match.nf
@@ -60,20 +60,8 @@ workflow MATCH {
      MATCH_COMBINE ( ch_match_combine_input )
      ch_versions = ch_versions.mix(MATCH_COMBINE.out.versions)
 
-    // mandatory output of match subworkflow
-    def combine_fail = true
-    MATCH_COMBINE.out.scorefile.subscribe onNext: { combine_fail = false },
-        onComplete: { combine_error(combine_fail) }
-
     emit:
     scorefiles = MATCH_COMBINE.out.scorefile
     db         = MATCH_COMBINE.out.summary
     versions   = ch_versions
 }
-
-def combine_error(boolean fail) {
-    if (fail) {
-        log.error "ERROR: Matching subworkflow failed"
-        System.exit(1)
-    }
-}
diff --git a/subworkflows/local/report.nf b/subworkflows/local/report.nf
@@ -88,22 +88,10 @@ workflow REPORT {
     SCORE_REPORT( ch_report_input, intersect_count, reference_panel_name, report_path )
     ch_versions = ch_versions.mix(SCORE_REPORT.out.versions)
 
-    // if this workflow runs, the report must be written
-    report_fail = true
-    SCORE_REPORT.out.report.subscribe onNext: { report_fail = false },
-        onComplete: { report_error(report_fail) }
-
     emit:
     versions = ch_versions
 }
 
 def annotate_sampleset(it) {
     [['id': it.getName().tokenize('_')[0]], it]
 }
-
-def report_error(boolean fail) {
-    if (fail) {
-        log.error "ERROR: No results report written!"
-        System.exit(1)
-    }
-}
diff --git a/tests/modules/match/test.yml b/tests/modules/match/test.yml
@@ -7,7 +7,7 @@
   files:
     - path: output/test/match/versions.yml
       contains:
-        - "pgscatalog.match: 0.2.3"
+        - "pgscatalog.match: 0.3.1"
 # can't test IPC output (not published)        
 
 - name: test match combine module
@@ -20,7 +20,7 @@
   files:
     - path: output/test/match/versions.yml
       contains:
-        - "pgscatalog.match: 0.2.3"
+        - "pgscatalog.match: 0.3.1"
     - path: output/test/match/test_ALL_additive_0.scorefile.gz           
       contains:
         - "PGS001229"     

diff --git a/workflows/pgsc_calc.nf b/workflows/pgsc_calc.nf
@@ -111,6 +111,12 @@ if (!run_ancestry_assign && !run_ancestry_adjust) {
     run_ancestry_bootstrap = false
 }
 
+if (workflow.profile.contains("test")) {
+    if (params.run_ancestry) {
+        error "ERROR: The test profile isn't compatible with --run_ancestry. Please use real data."
+    }
+}
+
 /*
 ========================================================================================
     IMPORT LOCAL MODULES/SUBWORKFLOWS