diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0d898aab..c61a88de 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,11 +1,15 @@ name: CI on: push: + paths-ignore: + - 'docs/**' branches: - dev - main - ci pull_request: + paths-ignore: + - 'docs/**' branches: - dev - main diff --git a/assets/examples/scorefiles/PGS001229_22_duplicate.txt b/assets/examples/scorefiles/PGS001229_22_duplicate.txt new file mode 100644 index 00000000..0e1bbce8 --- /dev/null +++ b/assets/examples/scorefiles/PGS001229_22_duplicate.txt @@ -0,0 +1,13 @@ +#pgs_id=duplicatetest +#pgs_name=duplicatetest +#trait_reported=duplicate test +#genome_build=GRCh37 +rsID chr_name chr_position effect_allele other_allele effect_weight is_haplotype imputation_method locus_name variant_description +rs5746679 22 17080378 A G 1.045457e-02 False +rs2192430 22 17300230 A G 1.411475e-04 False +rs165636 22 17318864 A C 8.166266e-03 False +rs165808 22 17327595 T C 7.791641e-03 False +rs5748743 22 17409813 A G 3.108784e-04 False +rs11703655 22 17450952 G A -3.033983e-02 False +rs2192155 22 17492533 G A 3.889990e-03 False +rs2845402 22 17542810 C T 8.036290e-03 False diff --git a/conf/modules.config b/conf/modules.config index b422c3fe..102776a8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -38,8 +38,8 @@ process { ext.conda = "$projectDir/environments/pgscatalog_utils/environment.yml" ext.docker = 'ghcr.io/pgscatalog/pygscatalog' ext.singularity = 'oras://ghcr.io/pgscatalog/pygscatalog' - ext.docker_version = ':pgscatalog-utils-1.2.0' - ext.singularity_version = ':pgscatalog-utils-1.2.0-singularity' + ext.docker_version = ':pgscatalog-utils-1.3.1' + ext.singularity_version = ':pgscatalog-utils-1.3.1-singularity' } withLabel: plink2 { @@ -77,9 +77,9 @@ process { withLabel: fraposa { ext.conda = "$projectDir/environments/fraposa/environment.yml" ext.singularity = 'oras://ghcr.io/pgscatalog/fraposa_pgsc' - ext.singularity_version = ':v1.0.0-singularity' + ext.singularity_version = ':v1.0.2-singularity' ext.docker = 'ghcr.io/pgscatalog/fraposa_pgsc' - ext.docker_version = ':v1.0.0' + ext.docker_version = ':v1.0.2' } // output configuration diff --git a/conf/test.config b/conf/test.config index e64e0d95..3079825d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -21,7 +21,7 @@ params { input = "$projectDir/assets/examples/samplesheet.csv" format = "csv" - scorefile = "$projectDir/assets/examples/scorefiles/PGS001229_22.txt" + scorefile = "$projectDir/assets/examples/scorefiles/PGS001229*.txt" outdir = "$projectDir/results" target_build = "GRCh37" diff --git a/environments/fraposa/environment.yml b/environments/fraposa/environment.yml index b69cb02a..4a2790f8 100644 --- a/environments/fraposa/environment.yml +++ b/environments/fraposa/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - nodefaults dependencies: - - fraposa-pgsc=1.0.0 \ No newline at end of file + - fraposa-pgsc=1.0.2 diff --git a/environments/pgscatalog_utils/environment.yml b/environments/pgscatalog_utils/environment.yml index 86c3f114..550613a1 100644 --- a/environments/pgscatalog_utils/environment.yml +++ b/environments/pgscatalog_utils/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - nodefaults dependencies: - - pgscatalog-utils=1.2.0 + - pgscatalog-utils=1.3.1 diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index f11034e7..47768a2f 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -73,5 +73,13 @@ class WorkflowMain { if (args[0]) { log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${args[0]}` has been detected.\n Hint: A common mistake is to provide multiple values to `-profile` separated by spaces. Please use commas to separate profiles instead,e.g., `-profile docker,test`." } + if (profile.contains("test")) { + def test_warn = """ + | INFO: The test profile is used to install the workflow and verify the software is working correctly on your system. + | INFO: Test input data and results are are only useful as examples of outputs, and are not biologically meaningful. + """ + log.info test_warn.stripMargin().stripIndent() + } + } } diff --git a/modules/local/plink2_score.nf b/modules/local/plink2_score.nf index 6e250f2e..71bde2d1 100644 --- a/modules/local/plink2_score.nf +++ b/modules/local/plink2_score.nf @@ -3,7 +3,6 @@ process PLINK2_SCORE { // labels are defined in conf/modules.config label 'process_low' label 'process_long' - label 'error_retry' label 'plink2' // controls conda, docker, + singularity options tag "$meta.id chromosome $meta.chrom effect type $scoremeta.effect_type $scoremeta.n" @@ -51,7 +50,6 @@ process PLINK2_SCORE { args2 = [args2, cols, 'list-variants', no_imputation, recessive, dominant, error_on_freq_calc].join(' ') outmeta = meta + ["n": scoremeta.n, "effect_type": scoremeta.effect_type] output = "${meta.id}_${meta.chrom}_${scoremeta.effect_type}_${scoremeta.n}" - // speed up the calculation by only considering scoring-file variants for allele frequency calculation (--extract) if (scoremeta.n_scores.toInteger() == 1) """ @@ -66,6 +64,16 @@ process PLINK2_SCORE { $input ${geno.baseName} \ --out ${output} + n_missing=\$(comm -3 <(zcat --force $scorefile | tail -n +2 | cut -f 1 | sort) <(sort ${output}.sscore.vars) | wc -l | tr -d ' ') + + if [ \$n_missing -gt 0 ] + then + echo "ERROR: \$n_missing variant(s) missing from final calculated score!" + exit 1 + else + echo "INFO: Scoring file variants match listed variants in sscore.vars" + fi + cat <<-END_VERSIONS > versions.yml ${task.process.tokenize(':').last()}: plink2: \$(plink2 --version 2>&1 | sed 's/^PLINK v//; s/ 64.*\$//' ) @@ -85,6 +93,16 @@ process PLINK2_SCORE { $input ${geno.baseName} \ --out ${output} + n_missing=\$(comm -3 <(zcat --force $scorefile | tail -n +2 | cut -f 1 | sort) <(sort ${output}.sscore.vars) | wc -l | tr -d ' ') + + if [ \$n_missing -gt 0 ] + then + echo "ERROR: \$n_missing variant(s) missing from final calculated score!" + exit 1 + else + echo "INFO: Scoring file variants match listed variants in sscore.vars" + fi + cat <<-END_VERSIONS > versions.yml ${task.process.tokenize(':').last()}: plink2: \$(plink2 --version 2>&1 | sed 's/^PLINK v//; s/ 64.*\$//' ) diff --git a/nextflow.config b/nextflow.config index ca1b7431..dc054608 100644 --- a/nextflow.config +++ b/nextflow.config @@ -259,7 +259,7 @@ manifest { description = 'The Polygenic Score Catalog Calculator is a nextflow pipeline for polygenic score calculation' mainScript = 'main.nf' nextflowVersion = '>=23.10.0' - version = '2.0.0-beta.2' + version = '2.0.0-beta.3' } // Load modules.config for DSL2 module specific options diff --git a/subworkflows/local/ancestry/ancestry_project.nf b/subworkflows/local/ancestry/ancestry_project.nf index f8b09ad5..7b044de7 100644 --- a/subworkflows/local/ancestry/ancestry_project.nf +++ b/subworkflows/local/ancestry/ancestry_project.nf @@ -253,11 +253,6 @@ workflow ANCESTRY_PROJECT { .groupTuple() .set { ch_projections } - // projections are a mandatory output of the subworkflow - def project_fail = true - FRAPOSA_PROJECT.out.pca.subscribe onNext: { project_fail = false }, - onComplete: { projection_error(project_fail) } - emit: intersection = INTERSECT_VARIANTS.out.intersection intersect_count = INTERSECT_VARIANTS.out.intersect_count.collect() @@ -270,10 +265,3 @@ workflow ANCESTRY_PROJECT { versions = ch_versions } - -def projection_error(boolean fail) { - if (fail) { - log.error "ERROR: Projection subworkflow failed" - System.exit(1) - } -} diff --git a/subworkflows/local/apply_score.nf b/subworkflows/local/apply_score.nf index 00a6fbac..6b81b70b 100644 --- a/subworkflows/local/apply_score.nf +++ b/subworkflows/local/apply_score.nf @@ -126,23 +126,11 @@ workflow APPLY_SCORE { SCORE_AGGREGATE ( ch_scores ) ch_versions = ch_versions.mix(SCORE_AGGREGATE.out.versions) - // aggregated score output from this subworkflow is mandatory - def aggregate_fail = true - SCORE_AGGREGATE.out.scores.subscribe onNext: { aggregate_fail = false }, - onComplete: { aggregate_error(aggregate_fail) } - emit: versions = ch_versions scores = SCORE_AGGREGATE.out.scores } -def aggregate_error(boolean fail) { - if (fail) { - log.error "ERROR: No scores calculated!" - System.exit(1) - } -} - def annotate_scorefiles(ArrayList scorefiles) { // INPUT: // [[meta], [scorefile_1, ..., scorefile_n]] -> flat list diff --git a/subworkflows/local/match.nf b/subworkflows/local/match.nf index f1c880c0..d16c9355 100644 --- a/subworkflows/local/match.nf +++ b/subworkflows/local/match.nf @@ -60,20 +60,8 @@ workflow MATCH { MATCH_COMBINE ( ch_match_combine_input ) ch_versions = ch_versions.mix(MATCH_COMBINE.out.versions) - // mandatory output of match subworkflow - def combine_fail = true - MATCH_COMBINE.out.scorefile.subscribe onNext: { combine_fail = false }, - onComplete: { combine_error(combine_fail) } - emit: scorefiles = MATCH_COMBINE.out.scorefile db = MATCH_COMBINE.out.summary versions = ch_versions } - -def combine_error(boolean fail) { - if (fail) { - log.error "ERROR: Matching subworkflow failed" - System.exit(1) - } -} diff --git a/subworkflows/local/report.nf b/subworkflows/local/report.nf index c1fe7953..5fa7ca38 100644 --- a/subworkflows/local/report.nf +++ b/subworkflows/local/report.nf @@ -88,11 +88,6 @@ workflow REPORT { SCORE_REPORT( ch_report_input, intersect_count, reference_panel_name, report_path ) ch_versions = ch_versions.mix(SCORE_REPORT.out.versions) - // if this workflow runs, the report must be written - report_fail = true - SCORE_REPORT.out.report.subscribe onNext: { report_fail = false }, - onComplete: { report_error(report_fail) } - emit: versions = ch_versions } @@ -100,10 +95,3 @@ workflow REPORT { def annotate_sampleset(it) { [['id': it.getName().tokenize('_')[0]], it] } - -def report_error(boolean fail) { - if (fail) { - log.error "ERROR: No results report written!" - System.exit(1) - } -} diff --git a/tests/modules/match/test.yml b/tests/modules/match/test.yml index d0c41127..e39e2b94 100644 --- a/tests/modules/match/test.yml +++ b/tests/modules/match/test.yml @@ -7,7 +7,7 @@ files: - path: output/test/match/versions.yml contains: - - "pgscatalog.match: 0.2.3" + - "pgscatalog.match: 0.3.1" # can't test IPC output (not published) - name: test match combine module @@ -20,7 +20,7 @@ files: - path: output/test/match/versions.yml contains: - - "pgscatalog.match: 0.2.3" + - "pgscatalog.match: 0.3.1" - path: output/test/match/test_ALL_additive_0.scorefile.gz contains: - "PGS001229" diff --git a/workflows/pgsc_calc.nf b/workflows/pgsc_calc.nf index 7bded6e6..361c9736 100644 --- a/workflows/pgsc_calc.nf +++ b/workflows/pgsc_calc.nf @@ -111,6 +111,12 @@ if (!run_ancestry_assign && !run_ancestry_adjust) { run_ancestry_bootstrap = false } +if (workflow.profile.contains("test")) { + if (params.run_ancestry) { + error "ERROR: The test profile isn't compatible with --run_ancestry. Please use real data." + } +} + /* ======================================================================================== IMPORT LOCAL MODULES/SUBWORKFLOWS