From 2a17b3e6d245d7f2a900f031ab10ec6aacda35c0 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Sun, 26 Mar 2023 10:36:52 +0100 Subject: [PATCH 001/113] minor changes in the CHANGELOG.md --- CHANGELOG.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cefd7e85..39182abc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [1.2.0] nfcore/quantms - [TBD] - Thimphu +## [1.1.1] nfcore/quantms - [TBD] - Berlin ### `Added` +- [#92](https://github.com/nf-core/quantms/pull/92) Improved output docs for mzTab +- [#91](https://github.com/nf-core/quantms/pull/91) Added dev profile for nightly versions of OpenMS tools + ### `Changed` +- [#88](https://github.com/nf-core/quantms/pull/88) Updated Comet version to latest release (2023010) + ### `Fixed` +- [#93](https://github.com/nf-core/quantms/pull/93) Fixed bug in docker vs. singularity container logic in some processes. + ### `Dependencies` ### `Parameters` From 72b4b6b76dd49b4d382b5be8248b7b9d4ac76592 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Sun, 26 Mar 2023 10:37:32 +0100 Subject: [PATCH 002/113] minor changes in the CHANGELOG.md --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 98ab6da9..aafcb937 100644 --- a/nextflow.config +++ b/nextflow.config @@ -376,7 +376,7 @@ manifest { description = """Quantitative Mass Spectrometry nf-core workflow""" mainScript = 'main.nf' nextflowVersion = '!>=22.10.1' - version = '1.2.0dev' + version = '1.1.1dev' doi = '10.5281/zenodo.7754148' } From c4b17e4ff5fc60258ab1ba55b5bfc48b771df02e Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 3 Aug 2023 14:31:21 -0700 Subject: [PATCH 003/113] added tdf2mzml --- .../local/diann_preliminary_analysis/main.nf | 2 +- modules/local/tdf2mzml/main.nf | 92 +++++++++++++++++++ modules/local/tdf2mzml/meta.yml | 42 +++++++++ subworkflows/local/file_preparation.nf | 24 ++++- 4 files changed, 154 insertions(+), 6 deletions(-) create mode 100644 modules/local/tdf2mzml/main.nf create mode 100644 modules/local/tdf2mzml/meta.yml diff --git a/modules/local/diann_preliminary_analysis/main.nf b/modules/local/diann_preliminary_analysis/main.nf index f3565139..05a545f1 100644 --- a/modules/local/diann_preliminary_analysis/main.nf +++ b/modules/local/diann_preliminary_analysis/main.nf @@ -7,7 +7,7 @@ process DIANN_PRELIMINARY_ANALYSIS { 'biocontainers/diann:v1.8.1_cv1' }" input: - tuple val(meta), file(mzML), file(predict_tsv) + tuple val(meta), path(mzML), path(predict_tsv) output: path "*.quant", emit: diann_quant diff --git a/modules/local/tdf2mzml/main.nf b/modules/local/tdf2mzml/main.nf new file mode 100644 index 00000000..b6d0c556 --- /dev/null +++ b/modules/local/tdf2mzml/main.nf @@ -0,0 +1,92 @@ + +// process TDF2MZML { +// publishDir "${params.mzml_dir}/${outputDir}", pattern: "*.mzML.gz", failOnError: true +// container 'mfreitas/tdf2mzml:latest' // I don't know which stable tag to use... +// label 'process_single' +// label 'error_retry' +// +// input: +// tuple val(file_id), path(tdf_input), val(outputDir) +// +// output: +// tuple val(file_id), path("${file(tdf_input.baseName).baseName}.mzML.gz") +// +// script: +// """ +// echo "Unpacking..." +// tar -xvf ${tdf_input} +// echo "Converting..." +// tdf2mzml.py -i *.d # --ms1_type "centroid" +// echo "Compressing..." +// mv *.mzml ${file(tdf_input.baseName).baseName}.mzML +// gzip ${file(tdf_input.baseName).baseName}.mzML +// """ +// +// stub: +// """ +// touch ${file(tdf_input.baseName).baseName}.mzML.gz +// """ +// } + + +process TDF2MZML { + tag "$meta.mzml_id" + label 'process_low' + label 'process_single' + label 'error_retry' + + // conda "conda-forge::mono bioconda::thermorawfileparser=1.3.4" + // conda is not enabled for DIA so ... disabling anyway + // container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + // 'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.3.4--ha8f3691_0' : + // 'quay.io/biocontainers/thermorawfileparser:1.3.4--ha8f3691_0' }" + // TODO add support for singularity ... + container 'mfreitas/tdf2mzml:latest' // I don't know which stable tag to use... + + stageInMode { + if (task.attempt == 1) { + if (executor == "awsbatch") { + 'symlink' + } else { + 'link' + } + } else if (task.attempt == 2) { + if (executor == "awsbatch") { + 'copy' + } else { + 'symlink' + } + } else { + 'copy' + } + } + + input: + tuple val(meta), path(rawfile) + + output: + tuple val(meta), path("*.mzML"), emit: mzmls_converted + path "*.d", emit: dotd_files + path "versions.yml", emit: version + path "*.log", emit: log + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.mzml_id}" + + """ + echo "Unpacking..." | tee --append ${rawfile.baseName}_conversion.log + tar -xvf ${rawfile} 2>&1 | tee --append ${rawfile.baseName}_conversion.log + echo "Converting..." | tee --append ${rawfile.baseName}_conversion.log + tdf2mzml.py -i *.d 2>&1 | tee --append ${rawfile.baseName}_conversion.log + echo "Compressing..." | tee --append ${rawfile.baseName}_conversion.log + mv *.mzml ${file(rawfile.baseName).baseName}.mzML + // gzip ${file(rawfile.baseName).baseName}.mzML + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tar: \$(tar --version) + tdf2mzml.py: \$(tdf2mzml.py --version) + END_VERSIONS + """ +} diff --git a/modules/local/tdf2mzml/meta.yml b/modules/local/tdf2mzml/meta.yml new file mode 100644 index 00000000..4bbf8e3f --- /dev/null +++ b/modules/local/tdf2mzml/meta.yml @@ -0,0 +1,42 @@ +name: tdf2mzml +description: convert raw bruker files to mzml files +keywords: + - raw + - mzML + - .d +tools: + - tdf2mzml: + description: | + It takes a bruker .d raw file as input and outputs indexed mzML + homepage: https://github.com/mafreitas/tdf2mzml + documentation: https://github.com/mafreitas/tdf2mzml +input: + - meta: + type: map + description: | + Groovy Map containing sample information + - rawfile: + type: file + description: | + Bruker Raw file archived using tar + pattern: "*.d.tar" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'MD5', enzyme:trypsin ] + - mzml: + type: file + description: indexed mzML + pattern: "*.mzML" + - log: + type: file + description: log file + pattern: "*.log" + - version: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@jspaezp" diff --git a/subworkflows/local/file_preparation.nf b/subworkflows/local/file_preparation.nf index 09bce578..21bca2b7 100644 --- a/subworkflows/local/file_preparation.nf +++ b/subworkflows/local/file_preparation.nf @@ -9,7 +9,7 @@ include { OPENMSPEAKPICKER } from '../../modules/local/openms/openmspeakpicke workflow FILE_PREPARATION { take: - ch_mzmls // channel: [ val(meta), raw/mzml ] + ch_mzmls // channel: [ val(meta), raw/mzml/d.tar ] main: ch_versions = Channel.empty() @@ -23,6 +23,7 @@ workflow FILE_PREPARATION { .branch { raw: WorkflowQuantms.hasExtension(it[1], 'raw') mzML: WorkflowQuantms.hasExtension(it[1], 'mzML') + dotD: WorkflowQuantms.hasExtension(it[1], '.d.tar') } .set { ch_branched_input } @@ -46,6 +47,12 @@ workflow FILE_PREPARATION { ch_results = ch_results.mix(ch_branched_input_mzMLs.inputIndexedMzML) THERMORAWFILEPARSER( ch_branched_input.raw ) + // Output is + // {'mzmls_converted': Tuple[val(meta), path(mzml)], + // 'version': Path(versions.yml), + // 'log': Path(*.txt)} + + // Where meta is the same as the input meta ch_versions = ch_versions.mix(THERMORAWFILEPARSER.out.version) ch_results = ch_results.mix(THERMORAWFILEPARSER.out.mzmls_converted) @@ -53,15 +60,22 @@ workflow FILE_PREPARATION { ch_versions = ch_versions.mix(MZMLINDEXING.out.version) ch_results = ch_results.mix(MZMLINDEXING.out.mzmls_indexed) - ch_results.map{ it -> [it[0], it[1]] }.set{ ch_mzml } + ch_results.map{ it -> [it[0], it[1]] }.set{ indexed_mzml_bundle } + + TDF2MZML( ch_branched_input.dotD ) + ch_versions = ch_versions.mix(TDF2MZML.out.version) + indexed_mzml_bundle = indexed_mzml_bundle.mix(TDF2MZML.out.mzmls_converted) + ch_results = indexed_mzml_bundle.mix(TDF2MZML.out.dotd_files) + // todo... evaluate if the .mzml is used explicitly anywhere else downstream - MZMLSTATISTICS( ch_mzml ) + MZMLSTATISTICS( indexed_mzml_bundle ) ch_statistics = ch_statistics.mix(MZMLSTATISTICS.out.mzml_statistics.collect()) ch_versions = ch_versions.mix(MZMLSTATISTICS.out.version) if (params.openms_peakpicking){ + // If the peak picker is enabled, it will over-write not bypass the .d files OPENMSPEAKPICKER ( - ch_results + indexed_mzml_bundle ) ch_versions = ch_versions.mix(OPENMSPEAKPICKER.out.version) @@ -70,7 +84,7 @@ workflow FILE_PREPARATION { emit: - results = ch_results // channel: [val(mzml_id), indexedmzml] + results = ch_results // channel: [val(mzml_id), indexedmzml|.d.tar] statistics = ch_statistics // channel: [ *_mzml_info.tsv ] version = ch_versions // channel: [ *.version.txt ] } From 0b555036ba39534d065e1713aac55d4d78bb3133 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 3 Aug 2023 16:27:12 -0700 Subject: [PATCH 004/113] added path to conversion --- subworkflows/local/file_preparation.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/file_preparation.nf b/subworkflows/local/file_preparation.nf index 21bca2b7..ca660c1b 100644 --- a/subworkflows/local/file_preparation.nf +++ b/subworkflows/local/file_preparation.nf @@ -3,6 +3,7 @@ // include { THERMORAWFILEPARSER } from '../../modules/local/thermorawfileparser/main' +include { TDF2MZML } from '../../modules/local/tdf2mzml/main' include { MZMLINDEXING } from '../../modules/local/openms/mzmlindexing/main' include { MZMLSTATISTICS } from '../../modules/local/mzmlstatistics/main' include { OPENMSPEAKPICKER } from '../../modules/local/openms/openmspeakpicker/main' From 4e9a931e6da19dde359303e8b84cba85dfffeb59 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 3 Aug 2023 18:02:43 -0700 Subject: [PATCH 005/113] changed comment character --- modules/local/tdf2mzml/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/tdf2mzml/main.nf b/modules/local/tdf2mzml/main.nf index b6d0c556..caf9dfac 100644 --- a/modules/local/tdf2mzml/main.nf +++ b/modules/local/tdf2mzml/main.nf @@ -81,7 +81,7 @@ process TDF2MZML { tdf2mzml.py -i *.d 2>&1 | tee --append ${rawfile.baseName}_conversion.log echo "Compressing..." | tee --append ${rawfile.baseName}_conversion.log mv *.mzml ${file(rawfile.baseName).baseName}.mzML - // gzip ${file(rawfile.baseName).baseName}.mzML + # gzip ${file(rawfile.baseName).baseName}.mzML cat <<-END_VERSIONS > versions.yml "${task.process}": From 51a6e6626f708b303566ba4fee9d32fc359f30b9 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 3 Aug 2023 18:39:15 -0700 Subject: [PATCH 006/113] added tuple of meta to tdf2mzml outs --- modules/local/tdf2mzml/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/tdf2mzml/main.nf b/modules/local/tdf2mzml/main.nf index caf9dfac..7e716f28 100644 --- a/modules/local/tdf2mzml/main.nf +++ b/modules/local/tdf2mzml/main.nf @@ -66,7 +66,7 @@ process TDF2MZML { output: tuple val(meta), path("*.mzML"), emit: mzmls_converted - path "*.d", emit: dotd_files + tuple val(meta), path("*.d"), emit: dotd_files path "versions.yml", emit: version path "*.log", emit: log From 9cc93fa9a760c100b9d03f6ffe1ef7f2a121b5bf Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 4 Aug 2023 06:14:37 -0700 Subject: [PATCH 007/113] added debug prints to diann conversion --- bin/diann_convert.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index 257d5e01..15af9f8a 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -148,6 +148,17 @@ def convert(ctx, folder, dia_params, diann_version, charge, missed_cleavages, qv out_msstats.loc[:, "IsotopeLabelType"] = "L" out_msstats["Reference"] = out_msstats.apply(lambda x: os.path.basename(x["Reference"]), axis=1) + # TODO remove this if not debugging + print("\n\nout_msstats >>>") + print(out_msstats.head(5)) + + print("\n\nf_table >>>") + print(f_table.head(5)) + + print("\n\ns_DataFrame >>>") + print(s_DataFrame.head(5)) + ## END TODO + out_msstats[["Fraction", "BioReplicate", "Condition"]] = out_msstats.apply( lambda x: query_expdesign_value(x["Run"], f_table, s_DataFrame), axis=1, result_type="expand" ) From e8752f6eea726aec18b5d5bf6f9045e5ba786d1b Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 4 Aug 2023 07:57:11 -0700 Subject: [PATCH 008/113] added renaming of dotd files after extraction --- modules/local/tdf2mzml/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/tdf2mzml/main.nf b/modules/local/tdf2mzml/main.nf index 7e716f28..5d5bb1f7 100644 --- a/modules/local/tdf2mzml/main.nf +++ b/modules/local/tdf2mzml/main.nf @@ -81,6 +81,7 @@ process TDF2MZML { tdf2mzml.py -i *.d 2>&1 | tee --append ${rawfile.baseName}_conversion.log echo "Compressing..." | tee --append ${rawfile.baseName}_conversion.log mv *.mzml ${file(rawfile.baseName).baseName}.mzML + mv *.d ${file(rawfile.baseName).baseName}.d # gzip ${file(rawfile.baseName).baseName}.mzML cat <<-END_VERSIONS > versions.yml From b559b211fb39a488ff4e0e41a2b890558a747cf4 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 4 Aug 2023 11:26:53 -0700 Subject: [PATCH 009/113] yet more debug printing info --- bin/diann_convert.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index 15af9f8a..e5a18be7 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -13,6 +13,10 @@ import pandas as pd from pyopenms import AASequence, FASTAFile, ModificationsDB +pd.set_option('display.max_rows', 500) +pd.set_option('display.max_columns', 500) +pd.set_option('display.width', 1000) + CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) @@ -149,6 +153,9 @@ def convert(ctx, folder, dia_params, diann_version, charge, missed_cleavages, qv out_msstats["Reference"] = out_msstats.apply(lambda x: os.path.basename(x["Reference"]), axis=1) # TODO remove this if not debugging + print("\n\nReference Column >>>") + print(out_msstats["Reference"]) + print("\n\nout_msstats >>>") print(out_msstats.head(5)) @@ -239,6 +246,17 @@ def query_expdesign_value(reference, f_table, s_table): :return: A tuple contains Fraction, BioReplicate and Condition :rtype: tuple """ + # TODO remove this if not debugging + print("\n\nreference >>>") + print(reference) + + print("\n\nf_table >>>") + print(f_table.head(5)) + + print("\n\ns_table >>>") + print(s_table.head(5)) + # END TODO + query_reference = f_table[f_table["run"] == reference] Fraction = query_reference["Fraction"].values[0] row = s_table[s_table["Sample"] == query_reference["Sample"].values[0]] From 0793038ecc23a4364dc208ab1b8b53b87d49e6a2 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 4 Aug 2023 12:50:06 -0700 Subject: [PATCH 010/113] added not to branching --- workflows/dia.nf | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/workflows/dia.nf b/workflows/dia.nf index feb08918..de1b4035 100644 --- a/workflows/dia.nf +++ b/workflows/dia.nf @@ -55,12 +55,18 @@ workflow DIA { // // MODULE: SILICOLIBRARYGENERATION // - SILICOLIBRARYGENERATION(ch_searchdb, DIANNCFG.out.diann_cfg) + if (!params.diann_speclib) { + SILICOLIBRARYGENERATION(ch_searchdb, DIANNCFG.out.diann_cfg) + speclib = SILICOLIBRARYGENERATION.out.predict_speclib + } else { + speclib = Channel.fromPath(params.diann_speclib) + } + // // MODULE: DIANN_PRELIMINARY_ANALYSIS // - DIANN_PRELIMINARY_ANALYSIS(ch_file_preparation_results.combine(SILICOLIBRARYGENERATION.out.predict_speclib)) + DIANN_PRELIMINARY_ANALYSIS(ch_file_preparation_results.combine(speclib)) ch_software_versions = ch_software_versions.mix(DIANN_PRELIMINARY_ANALYSIS.out.version.ifEmpty(null)) // @@ -69,7 +75,7 @@ workflow DIA { ASSEMBLE_EMPIRICAL_LIBRARY(ch_result.mzml.collect(), meta, DIANN_PRELIMINARY_ANALYSIS.out.diann_quant.collect(), - SILICOLIBRARYGENERATION.out.predict_speclib + speclib ) ch_software_versions = ch_software_versions.mix(ASSEMBLE_EMPIRICAL_LIBRARY.out.version.ifEmpty(null)) From 224e340df5d532fcfcd657b75a2740a9163f36df Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sat, 5 Aug 2023 19:03:24 -0700 Subject: [PATCH 011/113] refactoring of diann convert --- bin/diann_convert.py | 336 ++++++++++++++++++------------ modules/local/sdrfparsing/main.nf | 10 +- 2 files changed, 207 insertions(+), 139 deletions(-) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index e5a18be7..bb3d8963 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -3,19 +3,24 @@ This script converts the output from DIA-NN into three standard formats: MSstats, Triqler and mzTab. License: Apache 2.0 Authors: Hong Wong, Yasset Perez-Riverol +Revisions: + 2023-Aug-05: J. Sebastian Paez """ import logging import os import re +from dataclasses import dataclass +from pathlib import Path +from typing import Any, List import click import numpy as np import pandas as pd from pyopenms import AASequence, FASTAFile, ModificationsDB -pd.set_option('display.max_rows', 500) -pd.set_option('display.max_columns', 500) -pd.set_option('display.width', 1000) +pd.set_option("display.max_rows", 500) +pd.set_option("display.max_columns", 500) +pd.set_option("display.width", 1000) CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) @@ -43,10 +48,10 @@ def convert(ctx, folder, dia_params, diann_version, charge, missed_cleavages, qv the DiaNN main report, protein matrix, precursor matrix, experimental design file, protein sequence FASTA file, version file of DiaNN and mzml_info TSVs :type folder: str - :param diann_version: Path to a version file of DIA-NN - :type diann_version: str :param dia_params: A list contains DIA parameters :type dia_params: list + :param diann_version: Path to a version file of DIA-NN + :type diann_version: str :param charge: The charge assigned by DIA-NN(max_precursor_charge) :type charge: int :param missed_cleavages: Allowed missed cleavages assigned by DIA-NN @@ -54,94 +59,21 @@ def convert(ctx, folder, dia_params, diann_version, charge, missed_cleavages, qv :param qvalue_threshold: Threshold for filtering q value :type qvalue_threshold: float """ - pathdict = {key: [] for key in ["report", "exp_design", "pg_matrix", "pr_matrix", "fasta", "mzml_info"]} - fileslist = os.listdir(folder) - if not folder.endswith("/"): - folder = folder + "/" - for i in fileslist: - if i.endswith("report.tsv"): - pathdict["report"].append(i) - elif i.endswith("_openms_design.tsv"): - pathdict["exp_design"].append(i) - elif i.endswith("pg_matrix.tsv"): - pathdict["pg_matrix"].append(i) - elif i.endswith("pr_matrix.tsv"): - pathdict["pr_matrix"].append(i) - elif i.endswith(".fasta") or i.endswith(".fa"): - pathdict["fasta"].append(i) - elif i.endswith("mzml_info.tsv"): - pathdict["mzml_info"].append(i) - else: - pass - - for item in pathdict.items(): - if item[0] != "mzml_info" and len(item[1]) > 1: - logging.error(f"{item[0]} is duplicate, check whether the file is redundant or change the file name!") - - diann_report = folder + pathdict["report"][0] - exp_design = folder + pathdict["exp_design"][0] - pg_matrix = folder + pathdict["pg_matrix"][0] - pr_matrix = folder + pathdict["pr_matrix"][0] - fasta = folder + pathdict["fasta"][0] - diann_version_file = diann_version - - with open(diann_version_file) as f: - for line in f: - if "DIA-NN" in line: - diann_version_id = line.rstrip("\n").split(": ")[1] - break - - remain_cols = [ - "File.Name", - "Run", - "Protein.Group", + diann_directory = DiannDirectory(folder, diann_version_file=diann_version) + report = diann_directory.main_report_df(qvalue_threshold=qvalue_threshold) + s_DataFrame, f_table = diann_directory.exp_design_dfs() + + # Convert to MSstats + msstats_columns_keep = [ "Protein.Names", - "Protein.Ids", - "First.Protein.Description", - "PG.MaxLFQ", - "RT.Start", - "Global.Q.Value", - "Lib.Q.Value", - "PEP", - "Precursor.Normalised", - "Precursor.Id", - "Q.Value", "Modified.Sequence", - "Stripped.Sequence", "Precursor.Charge", "Precursor.Quantity", - "Global.PG.Q.Value", + "File.Name", + "Run", ] - report = pd.read_csv(diann_report, sep="\t", header=0, usecols=remain_cols) - # filter based on qvalue parameter for downstream analysiss - report = report[report["Q.Value"] < qvalue_threshold] - report["Calculate.Precursor.Mz"] = report.apply( - lambda x: calculate_mz(x["Stripped.Sequence"], x["Precursor.Charge"]), axis=1 - ) - - precursor_list = list(report["Precursor.Id"].unique()) - report["precursor.Index"] = report.apply(lambda x: precursor_list.index(x["Precursor.Id"]), axis=1) - - with open(exp_design, "r") as f: - data = f.readlines() - empty_row = data.index("\n") - f_table = [i.replace("\n", "").split("\t") for i in data[1:empty_row]] - f_header = data[0].replace("\n", "").split("\t") - f_table = pd.DataFrame(f_table, columns=f_header) - f_table.loc[:, "run"] = f_table.apply( - lambda x: os.path.splitext(os.path.basename(x["Spectra_Filepath"]))[0], axis=1 - ) - - s_table = [i.replace("\n", "").split("\t") for i in data[empty_row + 1 :]][1:] - s_header = data[empty_row + 1].replace("\n", "").split("\t") - s_DataFrame = pd.DataFrame(s_table, columns=s_header) - - # Convert to MSstats - out_msstats = pd.DataFrame() - out_msstats = report[ - ["Protein.Names", "Modified.Sequence", "Precursor.Charge", "Precursor.Quantity", "File.Name", "Run"] - ] + out_msstats = report[msstats_columns_keep] out_msstats.columns = ["ProteinName", "PeptideSequence", "PrecursorCharge", "Intensity", "Reference", "Run"] out_msstats = out_msstats[out_msstats["Intensity"] != 0] out_msstats.loc[:, "PeptideSequence"] = out_msstats.apply( @@ -169,73 +101,200 @@ def convert(ctx, folder, dia_params, diann_version, charge, missed_cleavages, qv out_msstats[["Fraction", "BioReplicate", "Condition"]] = out_msstats.apply( lambda x: query_expdesign_value(x["Run"], f_table, s_DataFrame), axis=1, result_type="expand" ) - out_msstats.to_csv(os.path.splitext(os.path.basename(exp_design))[0] + "_msstats_in.csv", sep=",", index=False) + exp_out_prefix = str(Path(diann_directory.exp_design).stem) + out_msstats.to_csv(exp_out_prefix + "_msstats_in.csv", sep=",", index=False) # Convert to Triqler - out_triqler = pd.DataFrame() - out_triqler = out_msstats[["ProteinName", "PeptideSequence", "PrecursorCharge", "Intensity", "Run", "Condition"]] + trinqler_cols = ["ProteinName", "PeptideSequence", "PrecursorCharge", "Intensity", "Run", "Condition"] + out_triqler = out_msstats[trinqler_cols] del out_msstats out_triqler.columns = ["proteins", "peptide", "charge", "intensity", "run", "condition"] out_triqler = out_triqler[out_triqler["intensity"] != 0] out_triqler.loc[:, "searchScore"] = report["Q.Value"] out_triqler.loc[:, "searchScore"] = 1 - out_triqler["searchScore"] - out_triqler.to_csv(os.path.splitext(os.path.basename(exp_design))[0] + "_triqler_in.tsv", sep="\t", index=False) + out_triqler.to_csv(exp_out_prefix + "_triqler_in.tsv", sep="\t", index=False) del out_triqler # Convert to mzTab - if diann_version_id == "1.8.1": - fasta_df = pd.DataFrame() - entries = [] - f = FASTAFile() - f.load(fasta, entries) - line = 0 - for e in entries: - fasta_df.loc[line, "id"] = e.identifier - fasta_df.loc[line, "seq"] = e.sequence - fasta_df.loc[line, "len"] = len(e.sequence) - line += 1 - - index_ref = f_table - index_ref.loc[:, "ms_run"] = index_ref.apply(lambda x: x["Fraction_Group"], axis=1) - index_ref.loc[:, "study_variable"] = index_ref.apply(lambda x: x["Sample"], axis=1) - index_ref.loc[:, "ms_run"] = index_ref.loc[:, "ms_run"].astype("int") - index_ref.loc[:, "study_variable"] = index_ref.loc[:, "study_variable"].astype("int") - report[["ms_run", "study_variable"]] = report.apply( - lambda x: add_info(x["Run"], index_ref), axis=1, result_type="expand" - ) + diann_directory.convert_to_mztab( + report=report, f_table=f_table, charge=charge, missed_cleavages=missed_cleavages, dia_params=dia_params + ) - (MTD, database) = mztab_MTD(index_ref, dia_params, fasta, charge, missed_cleavages) - pg = pd.read_csv( - pg_matrix, - sep="\t", - header=0, - ) - PRH = mztab_PRH(report, pg, index_ref, database, fasta_df) - del pg - pr = pd.read_csv( - pr_matrix, - sep="\t", - header=0, + +@dataclass +class DiannDirectory: + base_path: os.PathLike + diann_version_file: str + + def __post_init__(self): + self.base_path = Path(self.base_path) + if not self.base_path.exists() and not self.base_path.is_dir(): + raise NotADirectoryError(f"Path {self.base_path} does not exist") + self.diann_version_file = Path(self.diann_version_file) + if not self.diann_version_file.is_file(): + raise FileNotFoundError(f"Path {self.diann_version_file} does not exist") + + def find_suffix_file(self, suffix: str, only_first=True) -> os.PathLike: + matching = self.base_path.glob(f"**/*{suffix}") + if only_first: + try: + return next(matching) + except StopIteration: + raise FileNotFoundError(f"Could not find file with suffix {suffix}") + else: + out = list(matching) + if len(out) == 0: + raise FileNotFoundError(f"Could not find file with suffix {suffix}") + else: + return out + + @property + def report(self) -> os.PathLike: + return self.find_suffix_file("report.tsv") + + @property + def exp_design(self) -> os.PathLike: + return self.find_suffix_file("_openms_design.tsv") + + def exp_design_dfs(self): + with open(self.exp_design, "r") as f: + data = f.readlines() + empty_row = data.index("\n") + f_table = [i.replace("\n", "").split("\t") for i in data[1:empty_row]] + f_header = data[0].replace("\n", "").split("\t") + f_table = pd.DataFrame(f_table, columns=f_header) + f_table.loc[:, "run"] = f_table.apply( + lambda x: os.path.splitext(os.path.basename(x["Spectra_Filepath"]))[0], axis=1 + ) + + s_table = [i.replace("\n", "").split("\t") for i in data[empty_row + 1 :]][1:] + s_header = data[empty_row + 1].replace("\n", "").split("\t") + s_DataFrame = pd.DataFrame(s_table, columns=s_header) + + return s_DataFrame, f_table + + @property + def pg_matrix(self) -> os.PathLike: + return self.find_suffix_file("pg_matrix.tsv") + + @property + def pr_matrix(self) -> os.PathLike: + return self.find_suffix_file("pr_matrix.tsv") + + @property + def fasta(self) -> os.PathLike: + try: + return self.find_suffix_file(".fasta") + except FileNotFoundError: + return self.find_suffix_file(".fa") + + @property + def mzml_info(self) -> os.PathLike: + return self.find_suffix_file("mzml_info.tsv") + + @property + def diann_version(self) -> str: + with open(self.diann_version_file) as f: + for line in f: + if "DIA-NN" in line: + diann_version_id = line.rstrip("\n").split(": ")[1] + return diann_version_id + + def convert_to_mztab(self, report, f_table, charge: int, missed_cleavages: int, dia_params: List[Any]) -> None: + # Convert to mzTab + if self.diann_version == "1.8.1": + fasta_df = pd.DataFrame() + entries = [] + f = FASTAFile() + f.load(self.fasta, entries) + line = 0 + for e in entries: + fasta_df.loc[line, "id"] = e.identifier + fasta_df.loc[line, "seq"] = e.sequence + fasta_df.loc[line, "len"] = len(e.sequence) + line += 1 + + index_ref = f_table + index_ref.loc[:, "ms_run"] = index_ref.apply(lambda x: x["Fraction_Group"], axis=1) + index_ref.loc[:, "study_variable"] = index_ref.apply(lambda x: x["Sample"], axis=1) + index_ref.loc[:, "ms_run"] = index_ref.loc[:, "ms_run"].astype("int") + index_ref.loc[:, "study_variable"] = index_ref.loc[:, "study_variable"].astype("int") + report[["ms_run", "study_variable"]] = report.apply( + lambda x: add_info(x["Run"], index_ref), axis=1, result_type="expand" + ) + + (MTD, database) = mztab_MTD(index_ref, dia_params, str(self.fasta), charge, missed_cleavages) + pg = pd.read_csv( + self.pg_matrix, + sep="\t", + header=0, + ) + PRH = mztab_PRH(report, pg, index_ref, database, fasta_df) + del pg + pr = pd.read_csv( + self.pr_matrix, + sep="\t", + header=0, + ) + precursor_list = list(report["Precursor.Id"].unique()) + PEH = mztab_PEH(report, pr, precursor_list, index_ref, database) + del pr + PSH = mztab_PSH(report, str(self.base_path), database) + del report + MTD.loc["", :] = "" + PRH.loc[len(PRH) + 1, :] = "" + PEH.loc[len(PEH) + 1, :] = "" + out_basename = Path(self.exp_design).stem + with open(out_basename + "_out.mzTab", "w", newline="") as f: + MTD.to_csv(f, mode="w", sep="\t", index=False, header=False) + PRH.to_csv(f, mode="w", sep="\t", index=False, header=True) + PEH.to_csv(f, mode="w", sep="\t", index=False, header=True) + PSH.to_csv(f, mode="w", sep="\t", index=False, header=True) + + logging.info(f"mzTab file generated successfully! at {out_basename}_out.mzTab") + else: + # Maybe this error should be detected beforehand to save time ... + raise ValueError(f"Unsupported DIANN version {self.diann_version}") + + def main_report_df(self, qvalue_threshold: float) -> pd.DataFrame: + remain_cols = [ + "File.Name", + "Run", + "Protein.Group", + "Protein.Names", + "Protein.Ids", + "First.Protein.Description", + "PG.MaxLFQ", + "RT.Start", + "Global.Q.Value", + "Lib.Q.Value", + "PEP", + "Precursor.Normalised", + "Precursor.Id", + "Q.Value", + "Modified.Sequence", + "Stripped.Sequence", + "Precursor.Charge", + "Precursor.Quantity", + "Global.PG.Q.Value", + ] + report = pd.read_csv(self.report, sep="\t", header=0, usecols=remain_cols) + + # filter based on qvalue parameter for downstream analysiss + report = report[report["Q.Value"] < qvalue_threshold] + report["Calculate.Precursor.Mz"] = report.apply( + lambda x: calculate_mz(x["Stripped.Sequence"], x["Precursor.Charge"]), axis=1 ) - PEH = mztab_PEH(report, pr, precursor_list, index_ref, database) - del pr - PSH = mztab_PSH(report, folder, database) - del report - MTD.loc["", :] = "" - PRH.loc[len(PRH) + 1, :] = "" - PEH.loc[len(PEH) + 1, :] = "" - with open(os.path.splitext(os.path.basename(exp_design))[0] + "_out.mzTab", "w", newline="") as f: - MTD.to_csv(f, mode="w", sep="\t", index=False, header=False) - PRH.to_csv(f, mode="w", sep="\t", index=False, header=True) - PEH.to_csv(f, mode="w", sep="\t", index=False, header=True) - PSH.to_csv(f, mode="w", sep="\t", index=False, header=True) + + precursor_list = list(report["Precursor.Id"].unique()) + report["precursor.Index"] = report.apply(lambda x: precursor_list.index(x["Precursor.Id"]), axis=1) def query_expdesign_value(reference, f_table, s_table): """ - By matching the "Run" column in f_table or the "Sample" column in s_table, this function returns a tuple containing Fraction, - BioReplicate and Condition. + By matching the "Run" column in f_table or the "Sample" column in s_table, this function + returns a tuple containing Fraction, BioReplicate and Condition. :param reference: The value of "Run" column in out_msstats :type reference: str @@ -257,6 +316,9 @@ def query_expdesign_value(reference, f_table, s_table): print(s_table.head(5)) # END TODO + if reference not in f_table["run"].values: + raise ValueError(f"Reference {reference} not found in f_table;" f" values are {set(f_table['run'].values)}") + query_reference = f_table[f_table["run"] == reference] Fraction = query_reference["Fraction"].values[0] row = s_table[s_table["Sample"] == query_reference["Sample"].values[0]] diff --git a/modules/local/sdrfparsing/main.nf b/modules/local/sdrfparsing/main.nf index 429994c8..6803f3a4 100644 --- a/modules/local/sdrfparsing/main.nf +++ b/modules/local/sdrfparsing/main.nf @@ -23,12 +23,18 @@ process SDRFPARSING { """ ## -t2 since the one-table format parser is broken in OpenMS2.5 ## -l for legacy behavior to always add sample columns - ## TODO Update the sdrf-pipelines to dynamic print versions - parse_sdrf convert-openms -t2 -l -s ${sdrf} 2>&1 | tee ${sdrf.baseName}_parsing.log + ## JSPP 2023-Aug -- Adding --raw for now, this will allow the development of the + # bypass diann pipelie but break every other aspect of it. Make sure + # this flag is gone when PRing + + parse_sdrf convert-openms --raw -t2 -l -s ${sdrf} 2>&1 | tee ${sdrf.baseName}_parsing.log mv openms.tsv ${sdrf.baseName}_config.tsv mv experimental_design.tsv ${sdrf.baseName}_openms_design.tsv + ## TODO Update the sdrf-pipelines to dynamic print versions + # Version reporting can now be programmatic, since: + # https://github.com/bigbio/sdrf-pipelines/pull/134 cat <<-END_VERSIONS > versions.yml "${task.process}": sdrf-pipelines: \$(echo "0.0.22") From 9e4c872304f29e16fbc65a99a436ef4670adebef Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sat, 5 Aug 2023 19:11:55 -0700 Subject: [PATCH 012/113] fixed bug where mzml AND raw files were passed --- subworkflows/local/file_preparation.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/subworkflows/local/file_preparation.nf b/subworkflows/local/file_preparation.nf index ca660c1b..46de43da 100644 --- a/subworkflows/local/file_preparation.nf +++ b/subworkflows/local/file_preparation.nf @@ -65,9 +65,8 @@ workflow FILE_PREPARATION { TDF2MZML( ch_branched_input.dotD ) ch_versions = ch_versions.mix(TDF2MZML.out.version) - indexed_mzml_bundle = indexed_mzml_bundle.mix(TDF2MZML.out.mzmls_converted) ch_results = indexed_mzml_bundle.mix(TDF2MZML.out.dotd_files) - // todo... evaluate if the .mzml is used explicitly anywhere else downstream + indexed_mzml_bundle = indexed_mzml_bundle.mix(TDF2MZML.out.mzmls_converted) MZMLSTATISTICS( indexed_mzml_bundle ) ch_statistics = ch_statistics.mix(MZMLSTATISTICS.out.mzml_statistics.collect()) From 1a8639339e6603d79844425e3960efe72109b036 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sat, 5 Aug 2023 19:22:13 -0700 Subject: [PATCH 013/113] added speclib to schema --- nextflow.config | 3 ++- nextflow_schema.json | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 9d34cbdc..a1b4d0a8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -157,7 +157,8 @@ params { mass_acc_automatic = true pg_level = 2 species_genes = false - diann_normalize = true + diann_normalize = true + diann_speclib = '' // MSstats general options msstats_remove_one_feat_prot = true diff --git a/nextflow_schema.json b/nextflow_schema.json index 8470e3d0..5a900c52 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -891,6 +891,13 @@ "fa_icon": "far fa-check-square", "default": false }, + "diann_speclib": { + "type": "string", + "description": "The spectral library to use for DIA-NN", + "fa_icon": "fas fa-file", + "help_text": "If passed, will use that spectral library to carry out the DIA-NN search, instead of predicting one from the fasta file.", + "hidden": false + }, "diann_debug": { "type": "integer", "description": "Debug level", From 92ee45260efac0020755fd2098c68d24f9277760 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sat, 5 Aug 2023 20:48:55 -0700 Subject: [PATCH 014/113] returned report in abstracted diannconvert --- bin/diann_convert.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index bb3d8963..3657f8cb 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -289,6 +289,7 @@ def main_report_df(self, qvalue_threshold: float) -> pd.DataFrame: precursor_list = list(report["Precursor.Id"].unique()) report["precursor.Index"] = report.apply(lambda x: precursor_list.index(x["Precursor.Id"]), axis=1) + return report def query_expdesign_value(reference, f_table, s_table): From 33b35793b0dcfbc6d7818ba4667e8bf771ea0a1a Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 6 Aug 2023 03:26:17 -0700 Subject: [PATCH 015/113] refactor and speedup of diann summary --- bin/diann_convert.py | 473 +++++++++++++++++++---------- modules/local/diannconvert/main.nf | 1 + modules/local/tdf2mzml/main.nf | 34 +-- 3 files changed, 317 insertions(+), 191 deletions(-) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index 3657f8cb..df867452 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -11,7 +11,8 @@ import re from dataclasses import dataclass from pathlib import Path -from typing import Any, List +from typing import Any, List, Tuple, Dict +from functools import lru_cache import click import numpy as np @@ -24,6 +25,9 @@ CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) +logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.DEBUG) +logger = logging.getLogger(__name__) + @click.group(context_settings=CONTEXT_SETTINGS) def cli(): @@ -32,13 +36,14 @@ def cli(): @click.command("convert") @click.option("--folder", "-f") +@click.option("--exp_design", "-d") @click.option("--diann_version", "-v") @click.option("--dia_params", "-p") @click.option("--charge", "-c") @click.option("--missed_cleavages", "-m") @click.option("--qvalue_threshold", "-q", type=float) @click.pass_context -def convert(ctx, folder, dia_params, diann_version, charge, missed_cleavages, qvalue_threshold): +def convert(ctx, folder, exp_design, dia_params, diann_version, charge, missed_cleavages, qvalue_threshold): """ Convert DIA-NN output to MSstats, Triqler or mzTab. The output formats are @@ -61,7 +66,7 @@ def convert(ctx, folder, dia_params, diann_version, charge, missed_cleavages, qv """ diann_directory = DiannDirectory(folder, diann_version_file=diann_version) report = diann_directory.main_report_df(qvalue_threshold=qvalue_threshold) - s_DataFrame, f_table = diann_directory.exp_design_dfs() + s_DataFrame, f_table = get_exp_design_dfs(exp_design) # Convert to MSstats msstats_columns_keep = [ @@ -85,24 +90,28 @@ def convert(ctx, folder, dia_params, diann_version, charge, missed_cleavages, qv out_msstats["Reference"] = out_msstats.apply(lambda x: os.path.basename(x["Reference"]), axis=1) # TODO remove this if not debugging - print("\n\nReference Column >>>") - print(out_msstats["Reference"]) + logger.debug("\n\nReference Column >>>") + logger.debug(out_msstats["Reference"]) - print("\n\nout_msstats >>>") - print(out_msstats.head(5)) + logger.debug(f"\n\nout_msstats ({out_msstats.shape}) >>>") + logger.debug(out_msstats.head(5)) - print("\n\nf_table >>>") - print(f_table.head(5)) + logger.debug(f"\n\nf_table ({f_table.shape})>>>") + logger.debug(f_table.head(5)) - print("\n\ns_DataFrame >>>") - print(s_DataFrame.head(5)) + logger.debug(f"\n\ns_DataFrame ({s_DataFrame.shape})>>>") + logger.debug(s_DataFrame.head(5)) ## END TODO + logger.debug("Adding Fraction, BioReplicate, Condition columns") + design_looker = ExpDesignLooker(f_table=f_table, s_table=s_DataFrame) out_msstats[["Fraction", "BioReplicate", "Condition"]] = out_msstats.apply( - lambda x: query_expdesign_value(x["Run"], f_table, s_DataFrame), axis=1, result_type="expand" + lambda x: design_looker.query_expdesign_value(x["Run"]), axis=1, result_type="expand" ) - exp_out_prefix = str(Path(diann_directory.exp_design).stem) + del design_looker + exp_out_prefix = str(Path(exp_design).stem) out_msstats.to_csv(exp_out_prefix + "_msstats_in.csv", sep=",", index=False) + logger.info(f"MSstats input file is saved as {exp_out_prefix}_msstats_in.csv") # Convert to Triqler trinqler_cols = ["ProteinName", "PeptideSequence", "PrecursorCharge", "Intensity", "Run", "Condition"] @@ -114,14 +123,66 @@ def convert(ctx, folder, dia_params, diann_version, charge, missed_cleavages, qv out_triqler.loc[:, "searchScore"] = report["Q.Value"] out_triqler.loc[:, "searchScore"] = 1 - out_triqler["searchScore"] out_triqler.to_csv(exp_out_prefix + "_triqler_in.tsv", sep="\t", index=False) + logger.info(f"Triqler input file is saved as {exp_out_prefix}_triqler_in.tsv") del out_triqler + mztab_out = f"{str(Path(exp_design).stem)}_out.mzTab" # Convert to mzTab diann_directory.convert_to_mztab( - report=report, f_table=f_table, charge=charge, missed_cleavages=missed_cleavages, dia_params=dia_params + report=report, + f_table=f_table, + charge=charge, + missed_cleavages=missed_cleavages, + dia_params=dia_params, + out=mztab_out, ) +def _true_stem(x): + """ + Return the true stem of a file name, i.e. the + file name without the extension. + + :param x: The file name + :type x: str + :return: The true stem of the file name + :rtype: str + + Examples: + >>> _true_stem("foo.mzML") + 'foo' + >>> _true_stem("foo.d.tar") + 'foo' + + These examples can be tested with pytest: + $ pytest -v --doctest-modules + """ + split = os.path.basename(x).split(".") + stem = split[0] + + # Should I check here that the extensions are + # allowed? I can see how this would break if the + # file name contains a period. + return stem + + +def get_exp_design_dfs(exp_design_file): + logger.info(f"Reading experimental design file: {exp_design_file}") + with open(exp_design_file, "r") as f: + data = f.readlines() + empty_row = data.index("\n") + f_table = [i.replace("\n", "").split("\t") for i in data[1:empty_row]] + f_header = data[0].replace("\n", "").split("\t") + f_table = pd.DataFrame(f_table, columns=f_header) + f_table.loc[:, "run"] = f_table.apply(lambda x: _true_stem(x["Spectra_Filepath"]), axis=1) + + s_table = [i.replace("\n", "").split("\t") for i in data[empty_row + 1 :]][1:] + s_header = data[empty_row + 1].replace("\n", "").split("\t") + s_DataFrame = pd.DataFrame(s_table, columns=s_header) + + return s_DataFrame, f_table + + @dataclass class DiannDirectory: base_path: os.PathLike @@ -136,6 +197,15 @@ def __post_init__(self): raise FileNotFoundError(f"Path {self.diann_version_file} does not exist") def find_suffix_file(self, suffix: str, only_first=True) -> os.PathLike: + """Finds a file with a given suffix in the directory. + + :param suffix: The suffix to search for + :type suffix: str + :param only_first: Whether to return only the first file found, if false returns all, defaults to True + :type only_first: bool, optional + + :raises FileNotFoundError: If no file with the given suffix is found + """ matching = self.base_path.glob(f"**/*{suffix}") if only_first: try: @@ -153,27 +223,6 @@ def find_suffix_file(self, suffix: str, only_first=True) -> os.PathLike: def report(self) -> os.PathLike: return self.find_suffix_file("report.tsv") - @property - def exp_design(self) -> os.PathLike: - return self.find_suffix_file("_openms_design.tsv") - - def exp_design_dfs(self): - with open(self.exp_design, "r") as f: - data = f.readlines() - empty_row = data.index("\n") - f_table = [i.replace("\n", "").split("\t") for i in data[1:empty_row]] - f_header = data[0].replace("\n", "").split("\t") - f_table = pd.DataFrame(f_table, columns=f_header) - f_table.loc[:, "run"] = f_table.apply( - lambda x: os.path.splitext(os.path.basename(x["Spectra_Filepath"]))[0], axis=1 - ) - - s_table = [i.replace("\n", "").split("\t") for i in data[empty_row + 1 :]][1:] - s_header = data[empty_row + 1].replace("\n", "").split("\t") - s_DataFrame = pd.DataFrame(s_table, columns=s_header) - - return s_DataFrame, f_table - @property def pg_matrix(self) -> os.PathLike: return self.find_suffix_file("pg_matrix.tsv") @@ -194,69 +243,82 @@ def mzml_info(self) -> os.PathLike: return self.find_suffix_file("mzml_info.tsv") @property - def diann_version(self) -> str: + def validate_diann_version(self) -> str: + logger.debug("Validating DIANN version") + diann_version_id = None with open(self.diann_version_file) as f: for line in f: if "DIA-NN" in line: + logger.debug(f"Found DIA-NN version: {line}") diann_version_id = line.rstrip("\n").split(": ")[1] - return diann_version_id - def convert_to_mztab(self, report, f_table, charge: int, missed_cleavages: int, dia_params: List[Any]) -> None: - # Convert to mzTab - if self.diann_version == "1.8.1": - fasta_df = pd.DataFrame() - entries = [] - f = FASTAFile() - f.load(self.fasta, entries) - line = 0 - for e in entries: - fasta_df.loc[line, "id"] = e.identifier - fasta_df.loc[line, "seq"] = e.sequence - fasta_df.loc[line, "len"] = len(e.sequence) - line += 1 - - index_ref = f_table - index_ref.loc[:, "ms_run"] = index_ref.apply(lambda x: x["Fraction_Group"], axis=1) - index_ref.loc[:, "study_variable"] = index_ref.apply(lambda x: x["Sample"], axis=1) - index_ref.loc[:, "ms_run"] = index_ref.loc[:, "ms_run"].astype("int") - index_ref.loc[:, "study_variable"] = index_ref.loc[:, "study_variable"].astype("int") - report[["ms_run", "study_variable"]] = report.apply( - lambda x: add_info(x["Run"], index_ref), axis=1, result_type="expand" - ) - - (MTD, database) = mztab_MTD(index_ref, dia_params, str(self.fasta), charge, missed_cleavages) - pg = pd.read_csv( - self.pg_matrix, - sep="\t", - header=0, - ) - PRH = mztab_PRH(report, pg, index_ref, database, fasta_df) - del pg - pr = pd.read_csv( - self.pr_matrix, - sep="\t", - header=0, - ) - precursor_list = list(report["Precursor.Id"].unique()) - PEH = mztab_PEH(report, pr, precursor_list, index_ref, database) - del pr - PSH = mztab_PSH(report, str(self.base_path), database) - del report - MTD.loc["", :] = "" - PRH.loc[len(PRH) + 1, :] = "" - PEH.loc[len(PEH) + 1, :] = "" - out_basename = Path(self.exp_design).stem - with open(out_basename + "_out.mzTab", "w", newline="") as f: - MTD.to_csv(f, mode="w", sep="\t", index=False, header=False) - PRH.to_csv(f, mode="w", sep="\t", index=False, header=True) - PEH.to_csv(f, mode="w", sep="\t", index=False, header=True) - PSH.to_csv(f, mode="w", sep="\t", index=False, header=True) - - logging.info(f"mzTab file generated successfully! at {out_basename}_out.mzTab") + if diann_version_id is None: + raise ValueError(f"Could not find DIA-NN version in file {self.diann_version_file}") + elif diann_version_id == "1.8.1": + return diann_version_id else: # Maybe this error should be detected beforehand to save time ... raise ValueError(f"Unsupported DIANN version {self.diann_version}") + def convert_to_mztab( + self, report, f_table, charge: int, missed_cleavages: int, dia_params: List[Any], out: os.PathLike + ) -> None: + logger.info("Converting to mzTab") + # Convert to mzTab + self.validate_diann_version + + # This could be a branching point if we want to support other versions + # of DIA-NN, maybe something like this: + # if diann_version_id == "1.8.1": + # self.convert_to_mztab_1_8_1(report, f_table, charge, missed_cleavages, dia_params) + # else: + # raise ValueError(f"Unsupported DIANN version {diann_version_id}, supported versions are 1.8.1 ...") + + logger.info(f"Reading fasta file: {self.fasta}") + entries = [] + f = FASTAFile() + f.load(str(self.fasta), entries) + fasta_entries = [(e.identifier, e.sequence, len(e.sequence)) for e in entries] + fasta_df = pd.DataFrame(fasta_entries, columns=["id", "seq", "len"]) + + index_ref = f_table + index_ref.loc[:, "ms_run"] = index_ref.apply(lambda x: x["Fraction_Group"], axis=1) + index_ref.loc[:, "study_variable"] = index_ref.apply(lambda x: x["Sample"], axis=1) + index_ref.loc[:, "ms_run"] = index_ref.loc[:, "ms_run"].astype("int") + index_ref.loc[:, "study_variable"] = index_ref.loc[:, "study_variable"].astype("int") + report[["ms_run", "study_variable"]] = report.apply( + lambda x: add_info(x["Run"], index_ref), axis=1, result_type="expand" + ) + + (MTD, database) = mztab_MTD(index_ref, dia_params, str(self.fasta), charge, missed_cleavages) + pg = pd.read_csv( + self.pg_matrix, + sep="\t", + header=0, + ) + PRH = mztab_PRH(report, pg, index_ref, database, fasta_df) + del pg + pr = pd.read_csv( + self.pr_matrix, + sep="\t", + header=0, + ) + precursor_list = list(report["Precursor.Id"].unique()) + PEH = mztab_PEH(report, pr, precursor_list, index_ref, database) + del pr + PSH = mztab_PSH(report, str(self.base_path), database) + del report + MTD.loc["", :] = "" + PRH.loc[len(PRH) + 1, :] = "" + PEH.loc[len(PEH) + 1, :] = "" + with open(out, "w", newline="") as f: + MTD.to_csv(f, mode="w", sep="\t", index=False, header=False) + PRH.to_csv(f, mode="w", sep="\t", index=False, header=True) + PEH.to_csv(f, mode="w", sep="\t", index=False, header=True) + PSH.to_csv(f, mode="w", sep="\t", index=False, header=True) + + logger.info(f"mzTab file generated successfully! at {out}_out.mzTab") + def main_report_df(self, qvalue_threshold: float) -> pd.DataFrame: remain_cols = [ "File.Name", @@ -292,41 +354,51 @@ def main_report_df(self, qvalue_threshold: float) -> pd.DataFrame: return report -def query_expdesign_value(reference, f_table, s_table): - """ - By matching the "Run" column in f_table or the "Sample" column in s_table, this function - returns a tuple containing Fraction, BioReplicate and Condition. - - :param reference: The value of "Run" column in out_msstats - :type reference: str - :param f_table: A table contains experiment settings(search engine settings etc.) - :type f_table: pandas.core.frame.DataFrame - :param s_table: A table contains experimental design - :type s_table: pandas.core.frame.DataFrame - :return: A tuple contains Fraction, BioReplicate and Condition - :rtype: tuple - """ - # TODO remove this if not debugging - print("\n\nreference >>>") - print(reference) - - print("\n\nf_table >>>") - print(f_table.head(5)) - - print("\n\ns_table >>>") - print(s_table.head(5)) - # END TODO - - if reference not in f_table["run"].values: - raise ValueError(f"Reference {reference} not found in f_table;" f" values are {set(f_table['run'].values)}") - - query_reference = f_table[f_table["run"] == reference] - Fraction = query_reference["Fraction"].values[0] - row = s_table[s_table["Sample"] == query_reference["Sample"].values[0]] - BioReplicate = row["MSstats_BioReplicate"].values[0] - Condition = row["MSstats_Condition"].values[0] - - return Fraction, BioReplicate, Condition +@dataclass +class ExpDesignLooker: + """Caches the lookup of values in the experimetal design table.""" + + f_table: pd.DataFrame + s_table: pd.DataFrame + + def __hash__(self): + # This is not a perfect hash function but it will work + # for our use case, since we are not going to change + # the content of f_table and s_table + + # I am using this over a strict hash for performance reasons + # since the hash is calculated every time a method with cache + # is called. + hash_v = hash(self.f_table.values.shape) + hash(self.s_table.values.shape) + return hash_v + + @lru_cache(maxsize=128) + def query_expdesign_value(self, reference): + """ + By matching the "Run" column in f_table or the "Sample" column in s_table, this function + returns a tuple containing Fraction, BioReplicate and Condition. + + :param reference: The value of "Run" column in out_msstats + :type reference: str + :param f_table: A table contains experiment settings(search engine settings etc.) + :type f_table: pandas.core.frame.DataFrame + :param s_table: A table contains experimental design + :type s_table: pandas.core.frame.DataFrame + :return: A tuple contains Fraction, BioReplicate and Condition + :rtype: tuple + """ + f_table = self.f_table + s_table = self.s_table + if reference not in f_table["run"].values: + raise ValueError(f"Reference {reference} not found in f_table;" f" values are {set(f_table['run'].values)}") + + query_reference = f_table[f_table["run"] == reference] + Fraction = query_reference["Fraction"].values[0] + row = s_table[s_table["Sample"] == query_reference["Sample"].values[0]] + BioReplicate = row["MSstats_BioReplicate"].values[0] + Condition = row["MSstats_Condition"].values[0] + + return Fraction, BioReplicate, Condition def MTD_mod_info(fix_mod, var_mod): @@ -388,6 +460,7 @@ def mztab_MTD(index_ref, dia_params, fasta, charge, missed_cleavages): :return: MTD sub-table :rtype: pandas.core.frame.DataFrame """ + logger.info("Constructing MTD sub-table...") dia_params_list = dia_params.split(";") dia_params_list = ["null" if i == "" else i for i in dia_params_list] FragmentMassTolerance = dia_params_list[0] @@ -494,16 +567,17 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): :return: PRH sub-table :rtype: pandas.core.frame.DataFrame """ + logger.info("Constructing PRH sub-table...") file = list(pg.columns[5:]) col = {} for i in file: col[i] = ( - "protein_abundance_assay[" - + str(index_ref[index_ref["run"] == os.path.splitext(os.path.split(i)[1])[0]]["ms_run"].values[0]) - + "]" + "protein_abundance_assay[" + str(index_ref[index_ref["run"] == _true_stem(i)]["ms_run"].values[0]) + "]" ) pg.rename(columns=col, inplace=True) + + logger.debug("Classifying results type ...") pg.loc[:, "opt_global_result_type"] = pg.apply(classify_result_type, axis=1, result_type="expand") out_mztab_PRH = pd.DataFrame() @@ -524,6 +598,8 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): ] for i in null_col: out_mztab_PRH.loc[:, i] = "null" + + logger.debug("Extracting accession values (keeping first)...") out_mztab_PRH.loc[:, "accession"] = out_mztab_PRH.apply(lambda x: x["accession"].split(";")[0], axis=1) protein_details_df = out_mztab_PRH[out_mztab_PRH["opt_global_result_type"] == "indistinguishable_protein_group"] @@ -532,25 +608,32 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): protein_details_df = ( protein_details_df.drop("accession", axis=1).join(prh_series).reset_index().drop(columns="index") ) + # Q: how is the next line different from `df.loc[:, "col"] = 'protein_details'` ?? protein_details_df.loc[:, "opt_global_result_type"] = protein_details_df.apply(lambda x: "protein_details", axis=1) # protein_details_df = protein_details_df[-protein_details_df["accession"].str.contains("-")] out_mztab_PRH = pd.concat([out_mztab_PRH, protein_details_df]).reset_index(drop=True) + logger.debug("Calculating protein coverage...") + # This is a bottleneck out_mztab_PRH.loc[:, "protein_coverage"] = out_mztab_PRH.apply( lambda x: calculate_protein_coverage(report, x["accession"], x["Protein.Ids"], fasta_df), axis=1, result_type="expand", ) + logger.debug("Getting ambiguity members...") out_mztab_PRH.loc[:, "ambiguity_members"] = out_mztab_PRH.apply( lambda x: x["Protein.Ids"] if x["opt_global_result_type"] == "indistinguishable_protein_group" else "null", axis=1, ) + logger.debug("Matching PRH to best search engine score...") + score_looker = ModScoreLooker(report) out_mztab_PRH[["modifiedSequence", "best_search_engine_score[1]"]] = out_mztab_PRH.apply( - lambda x: PRH_match_report(report, x["accession"]), axis=1, result_type="expand" + lambda x: score_looker.get_score(x["accession"]), axis=1, result_type="expand" ) + logger.debug("Matching PRH to modifications...") out_mztab_PRH.loc[:, "modifications"] = out_mztab_PRH.apply( lambda x: find_modification(x["modifiedSequence"]), axis=1, result_type="expand" ) @@ -583,9 +666,6 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): col for col in out_mztab_PRH.columns if col.startswith("opt_") ] out_mztab_PRH = out_mztab_PRH[new_cols] - - # out_mztab_PRH.to_csv("./out_protein.mztab", sep=",", index=False) - return out_mztab_PRH @@ -606,6 +686,7 @@ def mztab_PEH(report, pr, precursor_list, index_ref, database): :return: PEH sub-table :rtype: pandas.core.frame.DataFrame """ + logger.info("Constructing PEH sub-table...") out_mztab_PEH = pd.DataFrame() out_mztab_PEH = pr.iloc[:, 0:10] out_mztab_PEH.drop( @@ -621,14 +702,17 @@ def mztab_PEH(report, pr, precursor_list, index_ref, database): inplace=True, ) + logger.debug("Finding modifications...") out_mztab_PEH.loc[:, "modifications"] = out_mztab_PEH.apply( lambda x: find_modification(x["opt_global_cv_MS:1000889_peptidoform_sequence"]), axis=1, result_type="expand" ) + logger.debug("Extracting sequence...") out_mztab_PEH.loc[:, "opt_global_cv_MS:1000889_peptidoform_sequence"] = out_mztab_PEH.apply( lambda x: AASequence.fromString(x["opt_global_cv_MS:1000889_peptidoform_sequence"]).toString(), axis=1 ) + logger.debug("Checking accession uniqueness...") out_mztab_PEH.loc[:, "unique"] = out_mztab_PEH.apply( lambda x: "0" if ";" in str(x["accession"]) else "1", axis=1, result_type="expand" ) @@ -638,11 +722,13 @@ def mztab_PEH(report, pr, precursor_list, index_ref, database): out_mztab_PEH.loc[:, i] = "null" out_mztab_PEH.loc[:, "opt_global_cv_MS:1002217_decoy_peptide"] = "0" + logger.debug("Matching precursor IDs...") ## average value of each study_variable ## quantity at peptide level: Precursor.Normalised out_mztab_PEH.loc[:, "pr_id"] = out_mztab_PEH.apply( lambda x: precursor_list.index(x["Precursor.Id"]), axis=1, result_type="expand" ) + logger.debug("Done Matching precursor IDs...") max_assay = max(index_ref["ms_run"]) max_study_variable = max(index_ref["study_variable"]) @@ -708,9 +794,29 @@ def mztab_PSH(report, folder, database): :return: PSH sub-table :rtype: pandas.core.frame.DataFrame """ + logger.info("Constructing PSH sub-table") + + def __find_info(dir, n): + # This line matches n="220101_myfile", folder="." to + # "myfolder/220101_myfile_mzml_info.tsv" + files = list(Path(dir).glob(f"*{n}*_info.tsv")) + # Check that it matches one and only one file + if not files: + raise ValueError(f"Could not find {n} info file in {dir}") + if len(files) > 1: + raise ValueError(f"Found multiple {n} info files in {dir}: {files}") + + return files[0] + out_mztab_PSH = pd.DataFrame() for n, group in report.groupby(["Run"]): - file = folder + n + "_mzml_info.tsv" + if isinstance(n, tuple) and len(n) == 1: + # This is here only to support versions of pandas where the groupby + # key is a tuple. + # related: https://github.com/pandas-dev/pandas/pull/51817 + n = n[0] + + file = __find_info(folder, n) target = pd.read_csv(file, sep="\t") group.sort_values(by="RT.Start", inplace=True) target = target[["Retention_Time", "SpectrumID", "Exp_Mass_To_Charge"]] @@ -774,6 +880,7 @@ def mztab_PSH(report, folder, database): for i in null_col: out_mztab_PSH.loc[:, i] = "null" + logger.info("Finding Modifications ...") out_mztab_PSH.loc[:, "modifications"] = out_mztab_PSH.apply( lambda x: find_modification(x["opt_global_cv_MS:1000889_peptidoform_sequence"]), axis=1, result_type="expand" ) @@ -863,16 +970,16 @@ def findstr(basestr, s, resultlist): resultlist = findstr(ref, i, resultlist) # Sort and merge the interval list resultlist.sort() - l, r = 0, 1 - while r < len(resultlist): - x1, y1 = resultlist[l][0], resultlist[l][1] - x2, y2 = resultlist[r][0], resultlist[r][1] + left, right = 0, 1 + while right < len(resultlist): + x1, y1 = resultlist[left][0], resultlist[left][1] + x2, y2 = resultlist[right][0], resultlist[right][1] if x2 > y1: - l += 1 - r += 1 + left += 1 + right += 1 else: - resultlist[l] = [x1, max(y1, y2)] - resultlist.pop(r) + resultlist[left] = [x1, max(y1, y2)] + resultlist.pop(right) coverage_length = np.array([i[1] - i[0] + 1 for i in resultlist]).sum() protein_coverage = format(coverage_length / len(ref), ".3f") @@ -897,7 +1004,7 @@ def match_in_report(report, target, max_, flag, level): :type level: str :return: A tuple contains multiple messages :rtype: tuple - """ + """ # noqa if flag == 1 and level == "pep": result = report[report["precursor.Index"] == target] PEH_params = [] @@ -926,23 +1033,60 @@ def match_in_report(report, target, max_, flag, level): return tuple(PRH_params) -def PRH_match_report(report, target): +class ModScoreLooker: """ - Returns a tuple contains modified sequences and the score at protein level. + Class used to cache the lookup table of accessions to best scores and their + respective mod sequences. + + Pre-computing the lookup table leverages a lot of speedum and vectortized + operations from pandas, and is much faster than doing the lookup on the fly + in a loop. :param report: Dataframe for Dia-NN main report :type report: pandas.core.frame.DataFrame - :param target: The value of "accession" column in report - :type target: str - :return: A tuple contains multiple information to construct PRH sub-table - :rtype: tuple """ - match = report[report["Protein.Ids"] == target] - modSeq = match["Modified.Sequence"].values[0] if match["Modified.Sequence"].values.size > 0 else np.nan - ## Score at protein level: Global.PG.Q.Value (without MBR) - score = match["Global.PG.Q.Value"].min() - - return modSeq, score + def __init__(self, report: pd.DataFrame) -> None: + self.lookup_dict = self.make_lookup_dict(report) + + def make_lookup_dict(self, report) -> Dict[str, Tuple[str, float]]: + grouped_df = ( + report[["Modified.Sequence", "Protein.Ids", "Global.PG.Q.Value"]] + .sort_values("Global.PG.Q.Value", ascending=True) + .groupby(["Protein.Ids"]) + .head(1) + ) + # Modified.Sequence Protein.Ids Global.PG.Q.Value + # 78265 LFNEQNFFQR Q8IV63;Q8IV63-2;Q8IV63-3 0.000252 + # 103585 NPTIVNFPITNVDLR Q53GS9;Q53GS9-2 0.000252 + # 103586 NPTWKPLIR Q7Z4Q2;Q7Z4Q2-2 0.000252 + # 103588 NPVGYPLAWQFLR Q9NZ08;Q9NZ08-2 0.000252 + + out = { + row["Protein.Ids"]: (row["Global.PG.Q.Value"], row["Modified.Sequence"]) for _, row in grouped_df.iterrows() + } + return out + + def get_score(self, protein_id: str) -> float: + """Returns a tuple contains modified sequences and the score at protein level. + + Gets the best score and corresponding peptide for a given protein_id + + Note that protein id can be something like 'Q8IV63;Q8IV63-2;Q8IV63-3' + + Note2: This implementation also fixes a bug where the function would + return the first peptide in the report, not the best one. (but with the + score of the best one for that accession) + + :param protein_id: The value of "accession" column in report + :type target: str + :return: A tuple that contains (best modified sequence, best score) + if the accession is not found, (np.nan, np.nan) is returned. + :rtype: tuple + """ + # Q: in what cases can the accession not exist in the table? + # or an accession not have peptides? + val = self.lookup_dict.get(protein_id, (np.nan, np.nan)) + return val def PEH_match_report(report, target): @@ -966,6 +1110,9 @@ def PEH_match_report(report, target): return search_score, time, q_score, spec_e, mz +# Pre-compiling the regex makes the next function 2x faster +# in myu benchmarking - JSPP +MODIFICATION_PATTERN = re.compile(r"\((.*?)\)") def find_modification(peptide): """ @@ -973,14 +1120,19 @@ def find_modification(peptide): :param peptide: Sequences of peptides :type peptide: str - :return: Modification sites + :return: Modification sites :rtype: str + + Examples: + >>> find_modification("PEPM(UNIMOD:35)IDE") + '4-UNIMOD:35' + >>> find_modification("SM(UNIMOD:35)EWEIRDS(UNIMOD:21)EPTIDEK") + '2-UNIMOD:35,9-UNIMOD:21' """ peptide = str(peptide) - pattern = re.compile(r"\((.*?)\)") - original_mods = pattern.findall(peptide) - peptide = re.sub(r"\(.*?\)", ".", peptide) - position = [i.start() for i in re.finditer(r"\.", peptide)] + original_mods = MODIFICATION_PATTERN.findall(peptide) + peptide = MODIFICATION_PATTERN.sub(".", peptide) + position = [i for i, x in enumerate(peptide) if x == "."] for j in range(1, len(position)): position[j] -= j @@ -988,7 +1140,7 @@ def find_modification(peptide): original_mods[k] = str(position[k]) + "-" + original_mods[k].upper() original_mods = ",".join(str(i) for i in original_mods) if len(original_mods) > 0 else "null" - + return original_mods @@ -1002,7 +1154,10 @@ def calculate_mz(seq, charge): :type charge: int :return: """ + # Q: is this faster if we make it a set? and maybe make it a global variable? ref = "ARNDBCEQZGHILKMFPSTWYV" + + # Q: Does this mean that all modified peptides will have a wrong m/z? seq = "".join([i for i in seq if i in ref]) if charge == "": return None diff --git a/modules/local/diannconvert/main.nf b/modules/local/diannconvert/main.nf index beab3c96..ceab2bde 100644 --- a/modules/local/diannconvert/main.nf +++ b/modules/local/diannconvert/main.nf @@ -36,6 +36,7 @@ process DIANNCONVERT { """ diann_convert.py convert \\ --folder ./ \\ + --exp_design ${exp_design} \\ --diann_version ./version/versions.yml \\ --dia_params "${dia_params}" \\ --charge $params.max_precursor_charge \\ diff --git a/modules/local/tdf2mzml/main.nf b/modules/local/tdf2mzml/main.nf index 5d5bb1f7..1a1686dd 100644 --- a/modules/local/tdf2mzml/main.nf +++ b/modules/local/tdf2mzml/main.nf @@ -1,46 +1,16 @@ -// process TDF2MZML { -// publishDir "${params.mzml_dir}/${outputDir}", pattern: "*.mzML.gz", failOnError: true -// container 'mfreitas/tdf2mzml:latest' // I don't know which stable tag to use... -// label 'process_single' -// label 'error_retry' -// -// input: -// tuple val(file_id), path(tdf_input), val(outputDir) -// -// output: -// tuple val(file_id), path("${file(tdf_input.baseName).baseName}.mzML.gz") -// -// script: -// """ -// echo "Unpacking..." -// tar -xvf ${tdf_input} -// echo "Converting..." -// tdf2mzml.py -i *.d # --ms1_type "centroid" -// echo "Compressing..." -// mv *.mzml ${file(tdf_input.baseName).baseName}.mzML -// gzip ${file(tdf_input.baseName).baseName}.mzML -// """ -// -// stub: -// """ -// touch ${file(tdf_input.baseName).baseName}.mzML.gz -// """ -// } - - process TDF2MZML { tag "$meta.mzml_id" label 'process_low' label 'process_single' label 'error_retry' - // conda "conda-forge::mono bioconda::thermorawfileparser=1.3.4" + // for rawfileparser this is conda "conda-forge::mono bioconda::thermorawfileparser=1.3.4" // conda is not enabled for DIA so ... disabling anyway + // container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? // 'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.3.4--ha8f3691_0' : // 'quay.io/biocontainers/thermorawfileparser:1.3.4--ha8f3691_0' }" - // TODO add support for singularity ... container 'mfreitas/tdf2mzml:latest' // I don't know which stable tag to use... stageInMode { From 6ae3565056715efd9fcb7cbdd85043e6322f7dc7 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 6 Aug 2023 04:06:16 -0700 Subject: [PATCH 016/113] added debug info to versions --- bin/diann_convert.py | 11 ++++++++++- .../templates/dumpsoftwareversions.py | 5 +++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index df867452..711a9cb3 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -613,7 +613,7 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): # protein_details_df = protein_details_df[-protein_details_df["accession"].str.contains("-")] out_mztab_PRH = pd.concat([out_mztab_PRH, protein_details_df]).reset_index(drop=True) - logger.debug("Calculating protein coverage...") + logger.debug("Calculating protein coverage (bottleneck)...") # This is a bottleneck out_mztab_PRH.loc[:, "protein_coverage"] = out_mztab_PRH.apply( lambda x: calculate_protein_coverage(report, x["accession"], x["Protein.Ids"], fasta_df), @@ -639,6 +639,9 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): ) ## quantity at protein level: PG.MaxLFQ + logger.debug("Matching PRH to protein quantification (bottleneck)...") + # TODO optimize this section + # This is a second bottleneck max_study_variable = max(index_ref["study_variable"]) PRH_params = [] for i in range(1, max_study_variable + 1): @@ -655,6 +658,7 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): axis=1, result_type="expand", ) + # end TODO out_mztab_PRH.loc[:, "PRH"] = "PRT" index = out_mztab_PRH.loc[:, "PRH"] @@ -732,13 +736,17 @@ def mztab_PEH(report, pr, precursor_list, index_ref, database): max_assay = max(index_ref["ms_run"]) max_study_variable = max(index_ref["study_variable"]) + logger.debug("Getting scores per run (bottleneck)") ms_run_score = [] for i in range(1, max_assay + 1): ms_run_score.append("search_engine_score[1]_ms_run[" + str(i) + "]") + out_mztab_PEH[ms_run_score] = out_mztab_PEH.apply( lambda x: match_in_report(report, x["pr_id"], max_assay, 0, "pep"), axis=1, result_type="expand" ) + logger.debug("Getting peptide abundances per study variable (bottleneck)") + # TODO optimize this PEH_params = [] for i in range(1, max_study_variable + 1): PEH_params.extend( @@ -754,6 +762,7 @@ def mztab_PEH(report, pr, precursor_list, index_ref, database): lambda x: match_in_report(report, x["pr_id"], max_study_variable, 1, "pep"), axis=1, result_type="expand" ) + logger.debug("Getting peptide properties") out_mztab_PEH[ [ "best_search_engine_score[1]", diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index b02fa23c..f5690c52 100755 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -58,6 +58,11 @@ def main(): "yaml": yaml.__version__, } + with open("$versions") as f: + # load as text and print for debugging + versions_text = f.read() + print(versions_text) + with open("$versions") as f: versions_by_process = yaml.safe_load(f) | versions_this_module From 4b58fbadc917345d0de9dfd32049667c69962cab Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 6 Aug 2023 09:39:07 -0700 Subject: [PATCH 017/113] moved tar version in the workflow from tracking to logging --- modules/local/tdf2mzml/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/tdf2mzml/main.nf b/modules/local/tdf2mzml/main.nf index 1a1686dd..9932874d 100644 --- a/modules/local/tdf2mzml/main.nf +++ b/modules/local/tdf2mzml/main.nf @@ -45,6 +45,7 @@ process TDF2MZML { def prefix = task.ext.prefix ?: "${meta.mzml_id}" """ + \$(tar --version) echo "Unpacking..." | tee --append ${rawfile.baseName}_conversion.log tar -xvf ${rawfile} 2>&1 | tee --append ${rawfile.baseName}_conversion.log echo "Converting..." | tee --append ${rawfile.baseName}_conversion.log @@ -56,7 +57,6 @@ process TDF2MZML { cat <<-END_VERSIONS > versions.yml "${task.process}": - tar: \$(tar --version) tdf2mzml.py: \$(tdf2mzml.py --version) END_VERSIONS """ From 288415af18a9c424f2501b7f585a386743e5ff24 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 6 Aug 2023 09:49:50 -0700 Subject: [PATCH 018/113] fixed dumb error --- modules/local/tdf2mzml/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/tdf2mzml/main.nf b/modules/local/tdf2mzml/main.nf index 9932874d..56e173ad 100644 --- a/modules/local/tdf2mzml/main.nf +++ b/modules/local/tdf2mzml/main.nf @@ -45,7 +45,7 @@ process TDF2MZML { def prefix = task.ext.prefix ?: "${meta.mzml_id}" """ - \$(tar --version) + tar --version echo "Unpacking..." | tee --append ${rawfile.baseName}_conversion.log tar -xvf ${rawfile} 2>&1 | tee --append ${rawfile.baseName}_conversion.log echo "Converting..." | tee --append ${rawfile.baseName}_conversion.log From ffff4c9fec7e8f3b6725631c8da50ea643027bf1 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 6 Aug 2023 14:37:25 -0700 Subject: [PATCH 019/113] experimental change of the experimental design to make multiqc pass --- modules/local/pmultiqc/main.nf | 6 ++++++ modules/local/sdrfparsing/main.nf | 13 ++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index 0cbf679f..c3da9688 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -25,6 +25,12 @@ process PMULTIQC { def disable_table_plots = (params.enable_pmultiqc) && (params.skip_table_plots) ? "--disable_table" : "" """ + # TODO remove the next line, it is only for debugging + tree results + + # Current hack to attempt matching file stems and not file extensions + # sed -i -e "s/((.d.tar)|(.d)|(.mzML)|(.mzml))\\t/\\t/g" + multiqc \\ -f \\ --config ./results/multiqc_config.yml \\ diff --git a/modules/local/sdrfparsing/main.nf b/modules/local/sdrfparsing/main.nf index 6803f3a4..aec416aa 100644 --- a/modules/local/sdrfparsing/main.nf +++ b/modules/local/sdrfparsing/main.nf @@ -26,12 +26,23 @@ process SDRFPARSING { ## JSPP 2023-Aug -- Adding --raw for now, this will allow the development of the # bypass diann pipelie but break every other aspect of it. Make sure - # this flag is gone when PRing + # this flag is gone when PRing. + # Context, without --raw, all file name extenssions are changed to mzML. + # related: https://github.com/bigbio/sdrf-pipelines/issues/145 parse_sdrf convert-openms --raw -t2 -l -s ${sdrf} 2>&1 | tee ${sdrf.baseName}_parsing.log mv openms.tsv ${sdrf.baseName}_config.tsv mv experimental_design.tsv ${sdrf.baseName}_openms_design.tsv + # Adding here the removal of the .tar, since DIANN takes the .d directly + # all logs from the tool match only the .d suffix. so it is easier to + # remove it here than doing the forensic tracking back of the file. + sed -i -e "s/((.tar)|(.tar.gz))\\t/\\t/g" ${sdrf.baseName}_openms_design.tsv + + # I am almost sure these do need to be as they exist in the file system + # before execution. + # sed -i -e "s/((.tar)|(.tar.gz))\\t/\\t/g" ${sdrf.baseName}_config.tsv + ## TODO Update the sdrf-pipelines to dynamic print versions # Version reporting can now be programmatic, since: # https://github.com/bigbio/sdrf-pipelines/pull/134 From d9feb35264cdd12385b042af23da15fe04042020 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 6 Aug 2023 15:50:13 -0700 Subject: [PATCH 020/113] changed debug listing of contents in multiqc from tree to ls --- modules/local/pmultiqc/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index c3da9688..5c5d6464 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -26,7 +26,7 @@ process PMULTIQC { """ # TODO remove the next line, it is only for debugging - tree results + ls -lcth * # Current hack to attempt matching file stems and not file extensions # sed -i -e "s/((.d.tar)|(.d)|(.mzML)|(.mzml))\\t/\\t/g" From 5e8fa6681ea568cc6a4b26a7a0af4d75fe5afdfe Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 6 Aug 2023 17:33:14 -0700 Subject: [PATCH 021/113] stuff --- modules/local/pmultiqc/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index 5c5d6464..a17b9281 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -30,6 +30,7 @@ process PMULTIQC { # Current hack to attempt matching file stems and not file extensions # sed -i -e "s/((.d.tar)|(.d)|(.mzML)|(.mzml))\\t/\\t/g" + sed -i -e "s/.tar\\t/\\t/g" results/*openms_design.tsv multiqc \\ -f \\ From bf084e2116db23b85b90fffc1c724c43e6614ed0 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 8 Aug 2023 18:26:16 -0700 Subject: [PATCH 022/113] major speedup --- bin/diann_convert.py | 216 +++++++++++++++++++++++++++++++++---------- 1 file changed, 166 insertions(+), 50 deletions(-) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index 711a9cb3..df309fac 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -281,14 +281,12 @@ def convert_to_mztab( fasta_entries = [(e.identifier, e.sequence, len(e.sequence)) for e in entries] fasta_df = pd.DataFrame(fasta_entries, columns=["id", "seq", "len"]) - index_ref = f_table - index_ref.loc[:, "ms_run"] = index_ref.apply(lambda x: x["Fraction_Group"], axis=1) - index_ref.loc[:, "study_variable"] = index_ref.apply(lambda x: x["Sample"], axis=1) - index_ref.loc[:, "ms_run"] = index_ref.loc[:, "ms_run"].astype("int") - index_ref.loc[:, "study_variable"] = index_ref.loc[:, "study_variable"].astype("int") - report[["ms_run", "study_variable"]] = report.apply( - lambda x: add_info(x["Run"], index_ref), axis=1, result_type="expand" - ) + logger.info("Mapping run information to report") + index_ref = f_table.copy() + index_ref.rename(columns={"Fraction_Group": "ms_run", "Sample": "study_variable", "run": "Run"}, inplace=True) + index_ref["ms_run"] = index_ref["ms_run"].astype("int") + index_ref["study_variable"] = index_ref["study_variable"].astype("int") + report = report.merge(index_ref[["ms_run", "Run", "study_variable"]], on="Run", validate="many_to_one") (MTD, database) = mztab_MTD(index_ref, dia_params, str(self.fasta), charge, missed_cleavages) pg = pd.read_csv( @@ -572,7 +570,7 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): col = {} for i in file: col[i] = ( - "protein_abundance_assay[" + str(index_ref[index_ref["run"] == _true_stem(i)]["ms_run"].values[0]) + "]" + "protein_abundance_assay[" + str(index_ref[index_ref["Run"] == _true_stem(i)]["ms_run"].values[0]) + "]" ) pg.rename(columns=col, inplace=True) @@ -638,27 +636,38 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): lambda x: find_modification(x["modifiedSequence"]), axis=1, result_type="expand" ) + logger.debug("Matching PRH to protein quantification...") ## quantity at protein level: PG.MaxLFQ - logger.debug("Matching PRH to protein quantification (bottleneck)...") - # TODO optimize this section - # This is a second bottleneck - max_study_variable = max(index_ref["study_variable"]) - PRH_params = [] - for i in range(1, max_study_variable + 1): - PRH_params.extend( - [ - "protein_abundance_study_variable[" + str(i) + "]", - "protein_abundance_stdev_study_variable[" + str(i) + "]", - "protein_abundance_std_error_study_variable[" + str(i) + "]", - ] - ) - - out_mztab_PRH[PRH_params] = out_mztab_PRH.apply( - lambda x: match_in_report(report, x["accession"], max_study_variable, 1, "protein"), - axis=1, - result_type="expand", + # This used to be a bottleneck in performance + # This implementation drops the run time from 57s to 25ms + protein_agg_report = ( + report[["PG.MaxLFQ", "Protein.Ids", "study_variable"]] + .groupby(["study_variable", "Protein.Ids"]) + .agg({"PG.MaxLFQ": ["mean", "std", "sem"]}) + .reset_index() + .pivot(columns=["study_variable"], index="Protein.Ids") + .reset_index() ) - # end TODO + protein_agg_report.columns = ["::".join([str(s) for s in col]).strip() for col in protein_agg_report.columns.values] + subname_mapper = { + "Protein.Ids::::": "Protein.Ids", + "PG.MaxLFQ::mean": "protein_abundance_study_variable", + "PG.MaxLFQ::std": "protein_abundance_stdev_study_variable", + "PG.MaxLFQ::sem": "protein_abundance_std_error_study_variable", + } + name_mapper = name_mapper_builder(subname_mapper) + protein_agg_report.rename(columns=name_mapper, inplace=True) + # out_mztab_PRH has columns accession and Protein.Ids; 'Q9NZJ9', 'A0A024RBG1;Q9NZJ9;Q9NZJ9-2'] + # the report table has 'Protein.Group' and 'Protein.Ids': 'Q9NZJ9', 'A0A024RBG1;Q9NZJ9;Q9NZJ9-2' + # Oddly enough the last implementation mapped the the accession (Q9NZJ9) in the mztab + # to the Protein.Ids (A0A024RBG1;Q9NZJ9;Q9NZJ9-2), leading to A LOT of missing values. + out_mztab_PRH = out_mztab_PRH.merge( + protein_agg_report, on="Protein.Ids", how="left", validate="many_to_one", copy=True + ) + del name_mapper + del subname_mapper + del protein_agg_report + # end of (former) bottleneck out_mztab_PRH.loc[:, "PRH"] = "PRT" index = out_mztab_PRH.loc[:, "PRH"] @@ -726,7 +735,7 @@ def mztab_PEH(report, pr, precursor_list, index_ref, database): out_mztab_PEH.loc[:, i] = "null" out_mztab_PEH.loc[:, "opt_global_cv_MS:1002217_decoy_peptide"] = "0" - logger.debug("Matching precursor IDs...") + logger.debug("Matching precursor IDs... (botleneck)") ## average value of each study_variable ## quantity at peptide level: Precursor.Normalised out_mztab_PEH.loc[:, "pr_id"] = out_mztab_PEH.apply( @@ -740,27 +749,15 @@ def mztab_PEH(report, pr, precursor_list, index_ref, database): ms_run_score = [] for i in range(1, max_assay + 1): ms_run_score.append("search_engine_score[1]_ms_run[" + str(i) + "]") - + out_mztab_PEH[ms_run_score] = out_mztab_PEH.apply( lambda x: match_in_report(report, x["pr_id"], max_assay, 0, "pep"), axis=1, result_type="expand" ) - logger.debug("Getting peptide abundances per study variable (bottleneck)") - # TODO optimize this - PEH_params = [] - for i in range(1, max_study_variable + 1): - PEH_params.extend( - [ - "peptide_abundance_study_variable[" + str(i) + "]", - "peptide_abundance_stdev_study_variable[" + str(i) + "]", - "peptide_abundance_std_error_study_variable[" + str(i) + "]", - "opt_global_mass_to_charge_study_variable[" + str(i) + "]", - "opt_global_retention_time_study_variable[" + str(i) + "]", - ] - ) - out_mztab_PEH[PEH_params] = out_mztab_PEH.apply( - lambda x: match_in_report(report, x["pr_id"], max_study_variable, 1, "pep"), axis=1, result_type="expand" - ) + logger.debug("Getting peptide abundances per study variable") + pep_study_report = per_peptide_study_report(report) + out_mztab_PEH = out_mztab_PEH.merge(pep_study_report, on="pr_id", how="left", validate="one_to_one", copy=True) + del pep_study_report logger.debug("Getting peptide properties") out_mztab_PEH[ @@ -929,7 +926,7 @@ def add_info(target, index_ref): :return: A tuple contains ms_run and study_variable :rtype: tuple """ - match = index_ref[index_ref["run"] == target] + match = index_ref[index_ref["Run"] == target] ms_run = match["ms_run"].values[0] study_variable = match["study_variable"].values[0] @@ -1054,6 +1051,7 @@ class ModScoreLooker: :param report: Dataframe for Dia-NN main report :type report: pandas.core.frame.DataFrame """ + def __init__(self, report: pd.DataFrame) -> None: self.lookup_dict = self.make_lookup_dict(report) @@ -1077,7 +1075,7 @@ def make_lookup_dict(self, report) -> Dict[str, Tuple[str, float]]: def get_score(self, protein_id: str) -> float: """Returns a tuple contains modified sequences and the score at protein level. - + Gets the best score and corresponding peptide for a given protein_id Note that protein id can be something like 'Q8IV63;Q8IV63-2;Q8IV63-3' @@ -1119,17 +1117,19 @@ def PEH_match_report(report, target): return search_score, time, q_score, spec_e, mz + # Pre-compiling the regex makes the next function 2x faster # in myu benchmarking - JSPP MODIFICATION_PATTERN = re.compile(r"\((.*?)\)") + def find_modification(peptide): """ Identify the modification site based on the peptide containing modifications. :param peptide: Sequences of peptides :type peptide: str - :return: Modification sites + :return: Modification sites :rtype: str Examples: @@ -1149,7 +1149,7 @@ def find_modification(peptide): original_mods[k] = str(position[k]) + "-" + original_mods[k].upper() original_mods = ",".join(str(i) for i in original_mods) if len(original_mods) > 0 else "null" - + return original_mods @@ -1174,6 +1174,122 @@ def calculate_mz(seq, charge): return AASequence.fromString(seq).getMZ(int(charge)) +def name_mapper_builder(subname_mapper): + """Returns a function that renames the columns of the grouped table to match the ones + in the final table. + + Examples: + >>> mapping_dict = { + ... "precursor.Index::::": "pr_id", + ... "Precursor.Normalised::mean": "peptide_abundance_study_variable" + ... } + >>> name_mapper = name_mapper_builder(mapping_dict) + >>> name_mapper("precursor.Index::::") + "pr_id" + >>> name_mapper("Precursor.Normalised::mean::1") + "peptide_abundance_study_variable[1]" + """ + num_regex = re.compile(r"(.*)::(\d+)$") + + def name_mapper(x): + """Renames the columns of the grouped table to match the ones + in the final table. + + Examples: + >>> name_mapper("precursor.Index::::") + "pr_id" + >>> name_mapper("Precursor.Normalised::mean::1") + "peptide_abundance_study_variable[1]" + """ + orig_x = x + for k, v in subname_mapper.items(): + if k in x: + x = x.replace(k, v) + out = num_regex.sub(r"\1[\2]", x) + if out == orig_x: + # This should never happen but I am adding it here + # to prevent myself from shoting myself in the foot in the future. + raise ValueError(f"Column name {x} not found in subname_mapper") + return out + + return name_mapper + + +def per_peptide_study_report(report: pd.DataFrame) -> pd.DataFrame: + """Summarizes the report at peptide/study level and flattens the columns. + + This function was implemented to replace an 'apply -> filter' approach. + In my benchmarking it went from 35.23 seconds for 4 samples, 4 conditions to + 0.007 seconds. + + This implementation differs in several aspects in the output values: + 1. in the fact that it actually gets values for the m/z + 2. always returns a float, whilst the apply version returns an 'object' dtype. + 3. The original implementation, missing values had the string 'null', here + they have the value np.nan. + 4. The order of the final output is different; the original orders columns by + study variables > calculated value, this one is calculated value > study variables. + + Calculates the mean, standard deviation and std error of the precursor + abundances, as well as the mean retention time and m/z. + + The names in the end are called "peptide" but thechnically the are at the + precursor level. (peptide+charge combinations). + + The columns will look like this in the end: + [ + 'pr_id', + 'peptide_abundance_study_variable[1]', + ... + 'peptide_abundance_stdev_study_variable[1]', + ... + 'peptide_abundance_std_error_study_variable[1]', + ... + 'opt_global_retention_time_study_variable[1]', + ... + 'opt_global_mass_to_charge_study_variable[1]', + ... + ] + """ + pep_study_grouped = ( + report.groupby(["study_variable", "precursor.Index"]) + .agg({"Precursor.Normalised": ["mean", "std", "sem"], "RT.Start": ["mean"], "Calculate.Precursor.Mz": ["mean"]}) + .reset_index() + .pivot(columns=["study_variable"], index="precursor.Index") + .reset_index() + ) + pep_study_grouped.columns = ["::".join([str(s) for s in col]).strip() for col in pep_study_grouped.columns.values] + # Columns here would be like: + # [ + # "precursor.Index::::", + # "Precursor.Normalised::mean::1", + # "Precursor.Normalised::mean::2", + # "Precursor.Normalised::std::1", + # "Precursor.Normalised::std::2", + # "Precursor.Normalised::sem::1", + # "Precursor.Normalised::sem::2", + # "RT.Start::mean::1", + # "RT.Start::mean::2", + # ] + # So the right names need to be given and the table can be joined with the other one + subname_mapper = { + "precursor.Index::::": "pr_id", + "Precursor.Normalised::mean": "peptide_abundance_study_variable", + "Precursor.Normalised::std": "peptide_abundance_stdev_study_variable", + "Precursor.Normalised::sem": "peptide_abundance_std_error_study_variable", + "Calculate.Precursor.Mz::mean": "opt_global_mass_to_charge_study_variable", + "RT.Start::mean": "opt_global_retention_time_study_variable", + } + name_mapper = name_mapper_builder(subname_mapper) + + pep_study_grouped.rename( + columns=name_mapper, + inplace=True, + ) + + return pep_study_grouped + + cli.add_command(convert) if __name__ == "__main__": From 786798fa1bd61ddc2fb44bd80a7f7f9ec9492492 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Wed, 9 Aug 2023 11:24:47 -0700 Subject: [PATCH 023/113] speed and logging improvement --- bin/diann_convert.py | 241 +++++++++++++++++++++++++++---------------- 1 file changed, 151 insertions(+), 90 deletions(-) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index df309fac..876a06e7 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -11,7 +11,7 @@ import re from dataclasses import dataclass from pathlib import Path -from typing import Any, List, Tuple, Dict +from typing import Any, List, Tuple, Dict, Set from functools import lru_cache import click @@ -24,8 +24,9 @@ pd.set_option("display.width", 1000) CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) +REVISION = "0.1.1" -logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.DEBUG) +logging.basicConfig(format="%(asctime)s [%(funcName)s] - %(message)s", level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -64,6 +65,8 @@ def convert(ctx, folder, exp_design, dia_params, diann_version, charge, missed_c :param qvalue_threshold: Threshold for filtering q value :type qvalue_threshold: float """ + logger.debug(f"Revision {REVISION}") + logger.debug("Reading input files...") diann_directory = DiannDirectory(folder, diann_version_file=diann_version) report = diann_directory.main_report_df(qvalue_threshold=qvalue_threshold) s_DataFrame, f_table = get_exp_design_dfs(exp_design) @@ -78,6 +81,7 @@ def convert(ctx, folder, exp_design, dia_params, diann_version, charge, missed_c "Run", ] + logger.debug("Converting to MSstats format...") out_msstats = report[msstats_columns_keep] out_msstats.columns = ["ProteinName", "PeptideSequence", "PrecursorCharge", "Intensity", "Reference", "Run"] out_msstats = out_msstats[out_msstats["Intensity"] != 0] @@ -566,6 +570,12 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): :rtype: pandas.core.frame.DataFrame """ logger.info("Constructing PRH sub-table...") + logger.debug( + f"Input report shape: {report.shape}," + f" input pg shape: {pg.shape}," + f" input index_ref shape: {index_ref.shape}," + f" input fasta_df shape: {fasta_df.shape}" + ) file = list(pg.columns[5:]) col = {} for i in file: @@ -576,7 +586,8 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): pg.rename(columns=col, inplace=True) logger.debug("Classifying results type ...") - pg.loc[:, "opt_global_result_type"] = pg.apply(classify_result_type, axis=1, result_type="expand") + pg["opt_global_result_type"] = "single_protein" + pg.loc[pg["Protein.Ids"].str.contains(";"), "opt_global_result_type"] = "indistinguishable_protein_group" out_mztab_PRH = pd.DataFrame() out_mztab_PRH = pg.drop(["Protein.Names"], axis=1) @@ -613,13 +624,16 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): logger.debug("Calculating protein coverage (bottleneck)...") # This is a bottleneck - out_mztab_PRH.loc[:, "protein_coverage"] = out_mztab_PRH.apply( - lambda x: calculate_protein_coverage(report, x["accession"], x["Protein.Ids"], fasta_df), - axis=1, - result_type="expand", + # reimplementation runs in 67s vs 137s (old) in my data + out_mztab_PRH.loc[:, "protein_coverage"] = calculate_protein_coverages( + report=report, out_mztab_PRH=out_mztab_PRH, fasta_df=fasta_df ) logger.debug("Getting ambiguity members...") + # IN THEORY this should be the same as + # out_mztab_PRH["ambiguity_members"] = out_mztab_PRH["Protein.Ids"] + # out_mztab_PRH.loc[out_mztab_PRH["opt_global_result_type"] == "single_protein", "ambiguity_members"] = "null" + # or out_mztab_PRH.loc[out_mztab_PRH["Protein.Ids"] == out_mztab_PRH["accession"], "ambiguity_members"] = "null" out_mztab_PRH.loc[:, "ambiguity_members"] = out_mztab_PRH.apply( lambda x: x["Protein.Ids"] if x["opt_global_result_type"] == "indistinguishable_protein_group" else "null", axis=1, @@ -682,7 +696,9 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): return out_mztab_PRH -def mztab_PEH(report, pr, precursor_list, index_ref, database): +def mztab_PEH( + report: pd.DataFrame, pr: pd.DataFrame, precursor_list: List[str], index_ref: pd.DataFrame, database: os.PathLike +) -> pd.DataFrame: """ Construct PEH sub-table. @@ -700,6 +716,12 @@ def mztab_PEH(report, pr, precursor_list, index_ref, database): :rtype: pandas.core.frame.DataFrame """ logger.info("Constructing PEH sub-table...") + logger.debug( + f"report.shape: {report.shape}, " + f" pr.shape: {pr.shape}," + f" len(precursor_list): {len(precursor_list)}," + f" index_ref.shape: {index_ref.shape}" + ) out_mztab_PEH = pd.DataFrame() out_mztab_PEH = pr.iloc[:, 0:10] out_mztab_PEH.drop( @@ -743,7 +765,6 @@ def mztab_PEH(report, pr, precursor_list, index_ref, database): ) logger.debug("Done Matching precursor IDs...") max_assay = max(index_ref["ms_run"]) - max_study_variable = max(index_ref["study_variable"]) logger.debug("Getting scores per run (bottleneck)") ms_run_score = [] @@ -759,17 +780,42 @@ def mztab_PEH(report, pr, precursor_list, index_ref, database): out_mztab_PEH = out_mztab_PEH.merge(pep_study_report, on="pr_id", how="left", validate="one_to_one", copy=True) del pep_study_report - logger.debug("Getting peptide properties") - out_mztab_PEH[ - [ - "best_search_engine_score[1]", - "retention_time", - "opt_global_q-value", - "opt_global_SpecEValue_score", - "mass_to_charge", - ] - ] = out_mztab_PEH.apply(lambda x: PEH_match_report(report, x["pr_id"]), axis=1, result_type="expand") + logger.debug("Getting peptide properties...") + # Re-implementing this section from apply -> assign to groupby->agg + # speeds up the process from 11s to 25ms in my data (~440x faster) + # Notably, this changes slightly... + # "opt_global_q-value" was the FIRST "Global.Q.Value", now its the min + # "opt_global_SpecEValue_score" was the FIRST "Lib.Q.Value" now its the min + # I believe picking the first is inconsistent because no sorting is checked + # and the first is arbitrary. + + aggtable = ( + report.groupby(["precursor.Index"]) + .agg( + { + "Q.Value": "min", + "RT.Start": "mean", + "Global.Q.Value": "min", + "Lib.Q.Value": "min", + "Calculate.Precursor.Mz": "mean", + } + ) + .reset_index() + .rename( + columns={ + "precursor.Index": "pr_id", + "Q.Value": "best_search_engine_score[1]", + "RT.Start": "retention_time", + "Global.Q.Value": "opt_global_q-value", + "Lib.Q.Value": "opt_global_SpecEValue_score", + "Calculate.Precursor.Mz": "mass_to_charge", + } + ) + ) + del out_mztab_PEH["mass_to_charge"] + out_mztab_PEH = out_mztab_PEH.merge(aggtable, on="pr_id", validate="one_to_one") + logger.debug("Re-ordering columns...") out_mztab_PEH.loc[:, "PEH"] = "PEP" out_mztab_PEH.loc[:, "database"] = database index = out_mztab_PEH.loc[:, "PEH"] @@ -780,7 +826,6 @@ def mztab_PEH(report, pr, precursor_list, index_ref, database): col for col in out_mztab_PEH.columns if col.startswith("opt_") ] out_mztab_PEH = out_mztab_PEH[new_cols] - # out_mztab_PEH.to_csv("./out_peptide.mztab", sep=",", index=False) return out_mztab_PEH @@ -946,53 +991,6 @@ def classify_result_type(target): return "single_protein" -def calculate_protein_coverage(report, target, reference, fasta_df): - """ - Calculate protein coverage. - - :param report: Dataframe for Dia-NN main report - :type report: pandas.core.frame.DataFrame - :param target: The value of "accession" column in out_mztab_PRH - :type target: str - :param fasta_df: A dataframe contains protein IDs, sequences and lengths - :type fasta_df: pandas.core.frame.DataFrame - :return: Protein coverage - :rtype: str - """ - peptide_list = report[report["Protein.Ids"] == reference]["Stripped.Sequence"].drop_duplicates().values - unique_peptides = [j for i, j in enumerate(peptide_list) if all(j not in k for k in peptide_list[i + 1 :])] - resultlist = [] - ref = fasta_df[fasta_df["id"].str.contains(target)]["seq"].values[0] - - def findstr(basestr, s, resultlist): - result = re.finditer(s, basestr) - if result: - for i in result: - resultlist.append([i.span()[0], i.span()[1] - 1]) - - return resultlist - - for i in unique_peptides: - resultlist = findstr(ref, i, resultlist) - # Sort and merge the interval list - resultlist.sort() - left, right = 0, 1 - while right < len(resultlist): - x1, y1 = resultlist[left][0], resultlist[left][1] - x2, y2 = resultlist[right][0], resultlist[right][1] - if x2 > y1: - left += 1 - right += 1 - else: - resultlist[left] = [x1, max(y1, y2)] - resultlist.pop(right) - - coverage_length = np.array([i[1] - i[0] + 1 for i in resultlist]).sum() - protein_coverage = format(coverage_length / len(ref), ".3f") - - return protein_coverage - - def match_in_report(report, target, max_, flag, level): """ This function is used to match the columns "ms_run" and "study_variable" from the report and @@ -1096,30 +1094,8 @@ def get_score(self, protein_id: str) -> float: return val -def PEH_match_report(report, target): - """ - Returns a tuple contains the score at peptide level, retain time, q_score, spec_e and mz. - - :param report: Dataframe for Dia-NN main report - :type report: pandas.core.frame.DataFrame - :param target: The value of "pr_id" column in report - :type target: str - :return: A tuple contains multiple information to construct PEH sub-table - :rtype: tuple - """ - match = report[report["precursor.Index"] == target] - ## Score at peptide level: the minimum of the respective precursor q-values (minimum of Q.Value per group) - search_score = match["Q.Value"].min() - time = match["RT.Start"].mean() - q_score = match["Global.Q.Value"].values[0] if match["Global.Q.Value"].values.size > 0 else np.nan - spec_e = match["Lib.Q.Value"].values[0] if match["Lib.Q.Value"].values.size > 0 else np.nan - mz = match["Calculate.Precursor.Mz"].mean() - - return search_score, time, q_score, spec_e, mz - - # Pre-compiling the regex makes the next function 2x faster -# in myu benchmarking - JSPP +# in my benchmarking - JSPP MODIFICATION_PATTERN = re.compile(r"\((.*?)\)") @@ -1290,6 +1266,91 @@ def per_peptide_study_report(report: pd.DataFrame) -> pd.DataFrame: return pep_study_grouped +def calculate_coverage(ref_sequence: str, sequences: Set[str]): + """ + Calculates the coverage of the reference sequence by the given sequences. + + Examples: + >>> calculate_coverage("WATEROVERTHEDUCKSBACK", {"WATER", "DUCK"}) + 0.45 + >>> calculate_coverage("DUCKDUCKDUCK", {"DUCK"}) + 1.0 + >>> calculate_coverage("WATEROVERTHEDUCK", {"DUCK"}) + 0.25 + """ + starts = [] + lengths = [] + for sequence in sequences: + local_start = 0 + while True: + local_start = ref_sequence.find(sequence, local_start) + if local_start == -1: + break + starts.append(local_start) + lengths.append(len(sequence)) + local_start += 1 + + # merge overlapping intervals + starts, lengths = zip(*sorted(zip(starts, lengths))) + merged_starts = [] + merged_lengths = [] + for start, length in zip(starts, lengths): + if merged_starts and merged_starts[-1] + merged_lengths[-1] >= start: + merged_lengths[-1] = max(merged_starts[-1] + merged_lengths[-1], start + length) - merged_starts[-1] + else: + merged_starts.append(start) + merged_lengths.append(length) + + # calculate coverage + coverage = sum(merged_lengths) / len(ref_sequence) + return coverage + + +def calculate_protein_coverages(report: pd.DataFrame, out_mztab_PRH: pd.DataFrame, fasta_df: pd.DataFrame) -> List[str]: + """Calculates protein coverages for the PRH table. + + The protein coverage is calculated as the fraction of the protein sequence + in the fasta df, covered by the peptides in the report table, for every + protein in the PRH table (defined by accession, not protein.ids). + """ + nested_df = ( + report[["Protein.Ids", "Stripped.Sequence"]] + .groupby("Protein.Ids") + .agg({"Stripped.Sequence": set}) + .reset_index() + ) + # Protein.Ids Stripped.Sequence + # 0 A0A024RBG1;Q9NZJ9;Q9NZJ9-2 {SEQEDEVLLVSSSR} + # 1 A0A096LP49;A0A096LP49-2 {SPWAMTERKHSSLER} + # 2 A0AVT1;A0AVT1-2 {EDFTLLDFINAVK, KPDHVPISSEDER, QDVIITALDNVEAR,... + ids_to_seqs = dict(zip(nested_df["Protein.Ids"], nested_df["Stripped.Sequence"])) + acc_to_ids = dict(zip(out_mztab_PRH["accession"], out_mztab_PRH["Protein.Ids"])) + fasta_id_to_seqs = dict(zip(fasta_df["id"], fasta_df["seq"])) + acc_to_fasta_ids = {} + + # Since fasta ids are something like sp|P51451|BLK_HUMAN but + # accessions are something like Q9Y6V7-2, we need to find a + # partial string match between the two (the best one) + for acc in acc_to_ids: + # I am pretty sure this is the slowest part of the code + matches = fasta_df[fasta_df["id"].str.contains(acc)]["id"] + if len(matches) == 0: + acc_to_fasta_ids[acc] = None + elif len(matches) == 1: + acc_to_fasta_ids[acc] = matches.iloc[0] + else: + # If multiple, find best match. ej. Pick Q9Y6V7 over Q9Y6V7-2 + # This can be acquired by finding the shortest string, since + # it entails more un-matched characters. + acc_to_fasta_ids[acc] = min(matches, key=len) + + out = [ + format(calculate_coverage(fasta_id_to_seqs[acc_to_fasta_ids[acc]], ids_to_seqs[acc_to_ids[acc]]), ".03f") + for acc in out_mztab_PRH["accession"] + ] + return out + + cli.add_command(convert) if __name__ == "__main__": From ec37001cf40d983d17e2b914d95320067e347bae Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Wed, 9 Aug 2023 13:37:39 -0700 Subject: [PATCH 024/113] improved error messaging when calculating coverages --- bin/diann_convert.py | 17 +++++++++++++---- pyproject.toml | 3 +++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index 876a06e7..63f39353 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -1335,6 +1335,7 @@ def calculate_protein_coverages(report: pd.DataFrame, out_mztab_PRH: pd.DataFram # I am pretty sure this is the slowest part of the code matches = fasta_df[fasta_df["id"].str.contains(acc)]["id"] if len(matches) == 0: + logger.warning(f"Could not find fasta id for accession {acc} in the fasta file.") acc_to_fasta_ids[acc] = None elif len(matches) == 1: acc_to_fasta_ids[acc] = matches.iloc[0] @@ -1344,10 +1345,18 @@ def calculate_protein_coverages(report: pd.DataFrame, out_mztab_PRH: pd.DataFram # it entails more un-matched characters. acc_to_fasta_ids[acc] = min(matches, key=len) - out = [ - format(calculate_coverage(fasta_id_to_seqs[acc_to_fasta_ids[acc]], ids_to_seqs[acc_to_ids[acc]]), ".03f") - for acc in out_mztab_PRH["accession"] - ] + out = [None] * len(out_mztab_PRH["accession"]) + + for i, acc in enumerate(out_mztab_PRH["accession"]): + f_id = acc_to_fasta_ids[acc] + if f_id is None: + out_cov = "null" + else: + cov = calculate_coverage(fasta_id_to_seqs[f_id], ids_to_seqs[acc_to_ids[acc]]) + out_cov = format(cov, ".03f") + + out[i] = out_cov + return out diff --git a/pyproject.toml b/pyproject.toml index 0d62beb6..20551e61 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,3 +8,6 @@ target_version = ["py37", "py38", "py39", "py310"] profile = "black" known_first_party = ["nf_core"] multi_line_output = 3 + +[tool.ruff] +line-length = 120 \ No newline at end of file From 882b968b2dddc9121a41b13866845950bcae645a Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Wed, 9 Aug 2023 18:34:01 -0700 Subject: [PATCH 025/113] further optimization --- bin/diann_convert.py | 79 +++++++++++--------------------------------- 1 file changed, 20 insertions(+), 59 deletions(-) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index 63f39353..4be7caf6 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -12,7 +12,6 @@ from dataclasses import dataclass from pathlib import Path from typing import Any, List, Tuple, Dict, Set -from functools import lru_cache import click import numpy as np @@ -108,11 +107,20 @@ def convert(ctx, folder, exp_design, dia_params, diann_version, charge, missed_c ## END TODO logger.debug("Adding Fraction, BioReplicate, Condition columns") - design_looker = ExpDesignLooker(f_table=f_table, s_table=s_DataFrame) - out_msstats[["Fraction", "BioReplicate", "Condition"]] = out_msstats.apply( - lambda x: design_looker.query_expdesign_value(x["Run"]), axis=1, result_type="expand" + # Changing implementation from apply to merge went from several minutes to + # ~50ms + tmp = ( + s_DataFrame[["Sample", "MSstats_Condition", "MSstats_BioReplicate"]] + .merge(f_table[["Fraction", "Sample", "run"]], on="Sample") + .rename(columns={"run": "Run", "MSstats_BioReplicate": "BioReplicate", "MSstats_Condition": "Condition"}) + .drop(columns=["Sample"]) ) - del design_looker + out_msstats = out_msstats.merge( + tmp, + on="Run", + validate="many_to_one", + ) + del tmp exp_out_prefix = str(Path(exp_design).stem) out_msstats.to_csv(exp_out_prefix + "_msstats_in.csv", sep=",", index=False) logger.info(f"MSstats input file is saved as {exp_out_prefix}_msstats_in.csv") @@ -356,52 +364,6 @@ def main_report_df(self, qvalue_threshold: float) -> pd.DataFrame: return report -@dataclass -class ExpDesignLooker: - """Caches the lookup of values in the experimetal design table.""" - - f_table: pd.DataFrame - s_table: pd.DataFrame - - def __hash__(self): - # This is not a perfect hash function but it will work - # for our use case, since we are not going to change - # the content of f_table and s_table - - # I am using this over a strict hash for performance reasons - # since the hash is calculated every time a method with cache - # is called. - hash_v = hash(self.f_table.values.shape) + hash(self.s_table.values.shape) - return hash_v - - @lru_cache(maxsize=128) - def query_expdesign_value(self, reference): - """ - By matching the "Run" column in f_table or the "Sample" column in s_table, this function - returns a tuple containing Fraction, BioReplicate and Condition. - - :param reference: The value of "Run" column in out_msstats - :type reference: str - :param f_table: A table contains experiment settings(search engine settings etc.) - :type f_table: pandas.core.frame.DataFrame - :param s_table: A table contains experimental design - :type s_table: pandas.core.frame.DataFrame - :return: A tuple contains Fraction, BioReplicate and Condition - :rtype: tuple - """ - f_table = self.f_table - s_table = self.s_table - if reference not in f_table["run"].values: - raise ValueError(f"Reference {reference} not found in f_table;" f" values are {set(f_table['run'].values)}") - - query_reference = f_table[f_table["run"] == reference] - Fraction = query_reference["Fraction"].values[0] - row = s_table[s_table["Sample"] == query_reference["Sample"].values[0]] - BioReplicate = row["MSstats_BioReplicate"].values[0] - Condition = row["MSstats_Condition"].values[0] - - return Fraction, BioReplicate, Condition - def MTD_mod_info(fix_mod, var_mod): """ @@ -757,16 +719,15 @@ def mztab_PEH( out_mztab_PEH.loc[:, i] = "null" out_mztab_PEH.loc[:, "opt_global_cv_MS:1002217_decoy_peptide"] = "0" - logger.debug("Matching precursor IDs... (botleneck)") - ## average value of each study_variable - ## quantity at peptide level: Precursor.Normalised - out_mztab_PEH.loc[:, "pr_id"] = out_mztab_PEH.apply( - lambda x: precursor_list.index(x["Precursor.Id"]), axis=1, result_type="expand" - ) - logger.debug("Done Matching precursor IDs...") - max_assay = max(index_ref["ms_run"]) + logger.debug("Matching precursor IDs...") + # Pre-calculating the indices and using a lookup table drops run time from + # ~6.5s to 11ms + precursor_indices = {k:i for i, k in enumerate(precursor_list)} + pr_ids = out_mztab_PEH["Precursor.Id"].apply(lambda x: precursor_indices[x]) + out_mztab_PEH["pr_id"] = pr_ids logger.debug("Getting scores per run (bottleneck)") + max_assay = max(index_ref["ms_run"]) ms_run_score = [] for i in range(1, max_assay + 1): ms_run_score.append("search_engine_score[1]_ms_run[" + str(i) + "]") From 69ae5603d19b78a9650b2ae53f9e67d266ae65d9 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 11 Aug 2023 14:08:46 -0700 Subject: [PATCH 026/113] changed paths to vals --- .../local/assemble_empirical_library/main.nf | 7 +++++-- modules/local/diannconvert/main.nf | 1 + modules/local/diannsummary/main.nf | 17 ++++++++++++----- workflows/dia.nf | 7 ++++--- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/modules/local/assemble_empirical_library/main.nf b/modules/local/assemble_empirical_library/main.nf index e2803773..9ee7cf73 100644 --- a/modules/local/assemble_empirical_library/main.nf +++ b/modules/local/assemble_empirical_library/main.nf @@ -7,7 +7,7 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { 'biocontainers/diann:v1.8.1_cv1' }" input: - path(mzMLs) + val(ms_files) val(meta) path("quant/*") path(lib) @@ -29,7 +29,10 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { scan_window = params.scan_window_automatic ? "--individual-windows" : "--window $params.scan_window" """ - diann --f ${(mzMLs as List).join(' --f ')} \\ + + ls -lcth + + diann --f ${(ms_files as List).join(' --f ')} \\ --lib ${lib} \\ --threads ${task.cpus} \\ --out-lib empirical_library.tsv \\ diff --git a/modules/local/diannconvert/main.nf b/modules/local/diannconvert/main.nf index ceab2bde..3ef0f650 100644 --- a/modules/local/diannconvert/main.nf +++ b/modules/local/diannconvert/main.nf @@ -23,6 +23,7 @@ process DIANNCONVERT { path "*msstats_in.csv", emit: out_msstats path "*triqler_in.tsv", emit: out_triqler path "*.mzTab", emit: out_mztab + path "*.log", emit: log path "versions.yml", emit: version exec: diff --git a/modules/local/diannsummary/main.nf b/modules/local/diannsummary/main.nf index b37f614d..b84e216b 100644 --- a/modules/local/diannsummary/main.nf +++ b/modules/local/diannsummary/main.nf @@ -7,11 +7,11 @@ process DIANNSUMMARY { 'biocontainers/diann:v1.8.1_cv1' }" input: - file(mzMLs) + val(ms_files) val(meta) - file(empirical_library) - file("quant/") - file(fasta) + path(empirical_library) + path("quant/") + path(fasta) output: path "diann_report.tsv", emit: main_report @@ -35,9 +35,16 @@ process DIANNSUMMARY { species_genes = params.species_genes ? "--species-genes": "" """ + # Adding here for inspection purposes + ls -lcth + # Notes: if .quant files are passed, mzml/.d files are not accessed, so the name needs to be passed but files + # do not need to pe present. + + # end, remove when done inspecting. + diann --lib ${empirical_library} \\ --fasta ${fasta} \\ - --f ${(mzMLs as List).join(' --f ')} \\ + --f ${(ms_files as List).join(' --f ')} \\ --threads ${task.cpus} \\ --verbose $params.diann_debug \\ ${scan_window} \\ diff --git a/workflows/dia.nf b/workflows/dia.nf index de1b4035..61f7c7ca 100644 --- a/workflows/dia.nf +++ b/workflows/dia.nf @@ -43,7 +43,7 @@ workflow DIA { ch_file_preparation_results.multiMap { meta: preprocessed_meta(it[0]) - mzml: it[1] + ms_file: it[1] } .set { ch_result } @@ -72,7 +72,7 @@ workflow DIA { // // MODULE: ASSEMBLE_EMPIRICAL_LIBRARY // - ASSEMBLE_EMPIRICAL_LIBRARY(ch_result.mzml.collect(), + ASSEMBLE_EMPIRICAL_LIBRARY(ch_result..collect(), meta, DIANN_PRELIMINARY_ANALYSIS.out.diann_quant.collect(), speclib @@ -88,7 +88,8 @@ workflow DIA { // // MODULE: DIANNSUMMARY // - DIANNSUMMARY(ch_result.mzml.collect(), meta, ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library, + ms_file_names = ch_result.ms_file.map{ msfile -> msfile.getName() }.collect() + DIANNSUMMARY(ms_file_names, meta, ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library, INDIVIDUAL_FINAL_ANALYSIS.out.diann_quant.collect(), ch_searchdb) ch_software_versions = ch_software_versions.mix(DIANNSUMMARY.out.version.ifEmpty(null)) From 266c12169c87c48d7670ec3b4eb74e8f9c6f5a7c Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 11 Aug 2023 15:08:13 -0700 Subject: [PATCH 027/113] typo fix --- workflows/dia.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/dia.nf b/workflows/dia.nf index 61f7c7ca..31198c74 100644 --- a/workflows/dia.nf +++ b/workflows/dia.nf @@ -72,7 +72,7 @@ workflow DIA { // // MODULE: ASSEMBLE_EMPIRICAL_LIBRARY // - ASSEMBLE_EMPIRICAL_LIBRARY(ch_result..collect(), + ASSEMBLE_EMPIRICAL_LIBRARY(ch_result.ms_file.collect(), meta, DIANN_PRELIMINARY_ANALYSIS.out.diann_quant.collect(), speclib From 4d94097c2f54bf0571bd63d4c82a35a6bbc5a229 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 11 Aug 2023 17:34:50 -0700 Subject: [PATCH 028/113] even more optimization in diann conversion --- bin/diann_convert.py | 55 +++++++++++++++++++++--------- modules/local/diannsummary/main.nf | 4 +++ 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index 4be7caf6..374507b4 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -17,6 +17,7 @@ import numpy as np import pandas as pd from pyopenms import AASequence, FASTAFile, ModificationsDB +from pyopenms.Constants import PROTON_MASS_U pd.set_option("display.max_rows", 500) pd.set_option("display.max_columns", 500) @@ -355,14 +356,22 @@ def main_report_df(self, qvalue_threshold: float) -> pd.DataFrame: # filter based on qvalue parameter for downstream analysiss report = report[report["Q.Value"] < qvalue_threshold] - report["Calculate.Precursor.Mz"] = report.apply( - lambda x: calculate_mz(x["Stripped.Sequence"], x["Precursor.Charge"]), axis=1 - ) - precursor_list = list(report["Precursor.Id"].unique()) - report["precursor.Index"] = report.apply(lambda x: precursor_list.index(x["Precursor.Id"]), axis=1) - return report + logger.debug("Calculating Precursor.Mz") + # Making the map is 10x faster, and includes the mass of + # the modification. with respect to the previous implementation. + uniq_masses = {k: AASequence.fromString(k).getMonoWeight() for k in report["Modified.Sequence"].unique()} + mass_vector = report["Modified.Sequence"].map(uniq_masses) + report["Calculate.Precursor.Mz"] = (mass_vector + (PROTON_MASS_U * report["Precursor.Charge"])) / report[ + "Precursor.Charge" + ] + logger.debug("Indexing Precursors") + # Making the map is 1500x faster + precursor_index_map = {k: i for i, k in enumerate(report["Precursor.Id"].unique())} + report["precursor.Index"] = report["Precursor.Id"].map(precursor_index_map) + + return report def MTD_mod_info(fix_mod, var_mod): @@ -722,19 +731,33 @@ def mztab_PEH( logger.debug("Matching precursor IDs...") # Pre-calculating the indices and using a lookup table drops run time from # ~6.5s to 11ms - precursor_indices = {k:i for i, k in enumerate(precursor_list)} - pr_ids = out_mztab_PEH["Precursor.Id"].apply(lambda x: precursor_indices[x]) + precursor_indices = {k: i for i, k in enumerate(precursor_list)} + pr_ids = out_mztab_PEH["Precursor.Id"].map(precursor_indices) out_mztab_PEH["pr_id"] = pr_ids + del precursor_indices - logger.debug("Getting scores per run (bottleneck)") - max_assay = max(index_ref["ms_run"]) - ms_run_score = [] - for i in range(1, max_assay + 1): - ms_run_score.append("search_engine_score[1]_ms_run[" + str(i) + "]") - - out_mztab_PEH[ms_run_score] = out_mztab_PEH.apply( - lambda x: match_in_report(report, x["pr_id"], max_assay, 0, "pep"), axis=1, result_type="expand" + logger.debug("Getting scores per run") + # This implementation is 422-700x faster than the apply-based one + tmp = ( + report.groupby(["precursor.Index", "ms_run"]) + .agg({"Q.Value": ["min"]}) + .reset_index() + .pivot(columns=["ms_run"], index="precursor.Index") + .reset_index() + ) + tmp.columns = ["::".join([str(s) for s in col]).strip() for col in tmp.columns.values] + subname_mapper = { + "precursor.Index::::": "precursor.Index", + "Q.Value::min": "search_engine_score[1]_ms_run", + } + name_mapper = name_mapper_builder(subname_mapper) + tmp.rename(columns=name_mapper, inplace=True) + out_mztab_PEH = out_mztab_PEH.merge( + tmp.rename(columns={"precursor.Index": "pr_id"}), on="pr_id", validate="one_to_one" ) + del tmp + del subname_mapper + del name_mapper logger.debug("Getting peptide abundances per study variable") pep_study_report = per_peptide_study_report(report) diff --git a/modules/local/diannsummary/main.nf b/modules/local/diannsummary/main.nf index b84e216b..1b1871e8 100644 --- a/modules/local/diannsummary/main.nf +++ b/modules/local/diannsummary/main.nf @@ -7,9 +7,13 @@ process DIANNSUMMARY { 'biocontainers/diann:v1.8.1_cv1' }" input: + // Note that the files are passed as names and not paths, this prevents them from being staged + // in the directory val(ms_files) val(meta) path(empirical_library) + // The quant path is passed, and diann will use the files in the quant directory instead + // of the ones passed in ms_files. path("quant/") path(fasta) From 41f76c292aa2c0942c0c4f81c4816c88fbc6533a Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 11 Aug 2023 17:55:12 -0700 Subject: [PATCH 029/113] added a bit of debug logging --- bin/diann_convert.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index 374507b4..d948f225 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -85,6 +85,8 @@ def convert(ctx, folder, exp_design, dia_params, diann_version, charge, missed_c out_msstats = report[msstats_columns_keep] out_msstats.columns = ["ProteinName", "PeptideSequence", "PrecursorCharge", "Intensity", "Reference", "Run"] out_msstats = out_msstats[out_msstats["Intensity"] != 0] + + # Q: What is this line doing? out_msstats.loc[:, "PeptideSequence"] = out_msstats.apply( lambda x: AASequence.fromString(x["PeptideSequence"]).toString(), axis=1 ) @@ -371,6 +373,9 @@ def main_report_df(self, qvalue_threshold: float) -> pd.DataFrame: precursor_index_map = {k: i for i, k in enumerate(report["Precursor.Id"].unique())} report["precursor.Index"] = report["Precursor.Id"].map(precursor_index_map) + logger.debug(f"Shape of main report {report.shape}") + logger.debug(str(report.head())) + return report From 7ce33c45d226ce7cb69968b6c0f619e77bbe7916 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 11 Aug 2023 18:16:50 -0700 Subject: [PATCH 030/113] change to path in the empirical lib step and yet even more optimization --- bin/diann_convert.py | 12 ++++++++---- modules/local/assemble_empirical_library/main.nf | 3 ++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index d948f225..84269b3c 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -90,10 +90,12 @@ def convert(ctx, folder, exp_design, dia_params, diann_version, charge, missed_c out_msstats.loc[:, "PeptideSequence"] = out_msstats.apply( lambda x: AASequence.fromString(x["PeptideSequence"]).toString(), axis=1 ) - out_msstats.loc[:, "FragmentIon"] = "NA" - out_msstats.loc[:, "ProductCharge"] = "0" - out_msstats.loc[:, "IsotopeLabelType"] = "L" - out_msstats["Reference"] = out_msstats.apply(lambda x: os.path.basename(x["Reference"]), axis=1) + out_msstats["FragmentIon"] = "NA" + out_msstats["ProductCharge"] = "0" + out_msstats["IsotopeLabelType"] = "L" + unique_reference_map = {k: os.path.basename(k) for k in out_msstats["Reference"].unique()} + out_msstats["Reference"] = out_msstats["Reference"].map(unique_reference_map) + del unique_reference_map # TODO remove this if not debugging logger.debug("\n\nReference Column >>>") @@ -357,7 +359,9 @@ def main_report_df(self, qvalue_threshold: float) -> pd.DataFrame: report = pd.read_csv(self.report, sep="\t", header=0, usecols=remain_cols) # filter based on qvalue parameter for downstream analysiss + logger.debug(f"Filtering report based on qvalue threshold: {qvalue_threshold}, {len(report)} rows") report = report[report["Q.Value"] < qvalue_threshold] + logger.debug(f"Report filtered, {len(report)} rows remaining") logger.debug("Calculating Precursor.Mz") # Making the map is 10x faster, and includes the mass of diff --git a/modules/local/assemble_empirical_library/main.nf b/modules/local/assemble_empirical_library/main.nf index 9ee7cf73..ac151032 100644 --- a/modules/local/assemble_empirical_library/main.nf +++ b/modules/local/assemble_empirical_library/main.nf @@ -7,7 +7,8 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { 'biocontainers/diann:v1.8.1_cv1' }" input: - val(ms_files) + // In this step the real files are passed, and not the names + path(ms_files) val(meta) path("quant/*") path(lib) From 85f306058263ee8697c42dbb28eebb5f1b6feacc Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 17 Aug 2023 05:00:42 -0700 Subject: [PATCH 031/113] Experimental/bruker report (#2) * added report info * split decompression step * Included ms1 TIC/BPC * added new data to report channel * added convert_dotd to the schema * fixed bug where passed mass accuracies were bypassed * code formatting --- assets/multiqc_config.yml | 37 +++ bin/dotd_2_mqc.py | 242 ++++++++++++++++++ .../local/assemble_empirical_library/main.nf | 16 +- modules/local/decompress_dotd/main.nf | 71 +++++ modules/local/decompress_dotd/meta.yml | 46 ++++ .../local/diann_preliminary_analysis/main.nf | 23 +- modules/local/diannsummary/main.nf | 4 +- modules/local/dotd_to_mqc/main.nf | 30 +++ modules/local/dotd_to_mqc/meta.yml | 0 modules/local/generate_diann_cfg/main.nf | 12 +- .../local/individual_final_analysis/main.nf | 6 +- modules/local/tdf2mzml/main.nf | 5 - nextflow.config | 3 + nextflow_schema.json | 6 + subworkflows/local/file_preparation.nf | 53 +++- workflows/dia.nf | 86 ++++--- workflows/quantms.nf | 1 + 17 files changed, 567 insertions(+), 74 deletions(-) create mode 100755 bin/dotd_2_mqc.py create mode 100644 modules/local/decompress_dotd/main.nf create mode 100644 modules/local/decompress_dotd/meta.yml create mode 100644 modules/local/dotd_to_mqc/main.nf create mode 100644 modules/local/dotd_to_mqc/meta.yml diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index afd31f93..bc756b51 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -18,6 +18,43 @@ custom_logo: "./nf-core-quantms_logo_light.png" custom_logo_url: "https://github.com/bigbio/quantms" custom_logo_title: "quantms" +custom_data: + total_ion_chromatograms: + file_format: 'tsv' + section_name: 'MS1 TIC' + description: 'MS1 total ion chromatograms extracted from the .d files' + plot_type: 'linegraph' + pconfig: + id: 'ms1_tic' + title: 'MS1 TIC' + ylab: 'Ion Count' + ymin: 0 + base_peak_chromatograms: + file_format: 'tsv' + section_name: 'MS1 BPC' + description: 'MS1 base peak chromatograms extracted from the .d files' + plot_type: 'linegraph' + pconfig: + id: 'ms1_bpc' + title: 'MS1 BPC' + ylab: 'Ion Count' + ymin: 0 + number_of_peaks: + file_format: 'tsv' + section_name: 'MS1 Peaks' + description: 'MS1 Peaks from the .d files' + plot_type: 'linegraph' + pconfig: + id: 'ms1_peaks' + title: 'MS1 Peaks' + ylab: 'Peak Count' + ymin: 0 sp: + total_ion_chromatograms: + fn: 'tic_*' + base_peak_chromatograms: + fn: 'bpc_*' + number_of_peaks: + fn: 'ms1_peaks_*' quantms/exp_design: fn: "*_design.tsv" diff --git a/bin/dotd_2_mqc.py b/bin/dotd_2_mqc.py new file mode 100755 index 00000000..011e302d --- /dev/null +++ b/bin/dotd_2_mqc.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python +GENERAL_HELP = """ +Converts .d files to multiqc compatible files. + +Generates the following files: + - tic_.tsv + - bpc_.tsv + - ms1_peaks_.tsv + - general_stats_.tsv + - dotd_mqc.yml + +Usage: + $ python dotd_2_mqc.py + $ cd + $ multiqc -c dotd_mqc.yml . +""" + +from typing import List, Tuple # noqa: E402 +import os # noqa: E402 +import sqlite3 # noqa: E402 +import argparse # noqa: E402 +from pathlib import Path # noqa: E402 +from dataclasses import dataclass # noqa: E402 +from logging import getLogger # noqa: E402 + +VERSION = "0.0.1" +logger = getLogger(__name__) + +MQC_YML = """ +custom_data: + total_ion_chromatograms: + file_format: 'tsv' + section_name: 'MS1 TIC' + description: 'MS1 total ion chromatograms extracted from the .d files' + plot_type: 'linegraph' + pconfig: + id: 'ms1_tic' + title: 'MS1 TIC' + ylab: 'Ion Count' + ymin: 0 + base_peak_chromatograms: + file_format: 'tsv' + section_name: 'MS1 BPC' + description: 'MS1 base peak chromatograms extracted from the .d files' + plot_type: 'linegraph' + pconfig: + id: 'ms1_bpc' + title: 'MS1 BPC' + ylab: 'Ion Count' + ymin: 0 + number_of_peaks: + file_format: 'tsv' + section_name: 'MS1 Peaks' + description: 'MS1 Peaks from the .d files' + plot_type: 'linegraph' + pconfig: + id: 'ms1_peaks' + title: 'MS1 Peaks' + ylab: 'Peak Count' + ymin: 0 +sp: + total_ion_chromatograms: + fn: 'tic_*' + base_peak_chromatograms: + fn: 'bpc_*' + number_of_peaks: + fn: 'ms1_peaks_*' + general_stats: + fn: 'general_stats_*' +""" + + +@dataclass +class DotDFile: + filepath: os.PathLike + + @property + def sql_filepath(self): + fp = Path(self.filepath) / "analysis.tdf" + return fp + + @property + def basename(self): + return Path(self.filepath).stem + + @property + def ms1_tic(self) -> List[Tuple[float, float]]: + """Gets the MS1 total-ion-chromatogram. + + Returns: + List[Tuple[float, float]]: List of (time, intensity) tuples. + """ + cmd = """ + SELECT CAST(Time AS INTEGER), AVG(SummedIntensities) + FROM frames WHERE MsMsType = '0' + GROUP BY CAST(Time AS INTEGER) + ORDER BY Time + """ + conn = sqlite3.connect(self.sql_filepath) + c = conn.cursor() + out = c.execute(cmd).fetchall() + conn.close() + return out + + @property + def ms1_bpc(self) -> List[Tuple[float, float]]: + """Gets the MS1 base-peak-chromatogram. + + Returns: + List[Tuple[float, float]]: List of (time, intensity) tuples. + """ + cmd = """ + SELECT CAST(Time AS INTEGER), MAX(MaxIntensity) + FROM frames WHERE MsMsType = '0' + GROUP BY CAST(Time AS INTEGER) + ORDER BY Time + """ + conn = sqlite3.connect(self.sql_filepath) + c = conn.cursor() + out = c.execute(cmd).fetchall() + conn.close() + return out + + @property + def ms1_peaks(self) -> List[Tuple[float, float]]: + """Gets the number of MS1 peaks. + + Returns: + List[Tuple[float, float]]: List of (time, intensity) tuples. + """ + cmd = """ + SELECT CAST(Time AS INTEGER), AVG(NumPeaks) + FROM frames WHERE MsMsType = '0' + GROUP BY CAST(Time AS INTEGER) + ORDER BY Time + """ + conn = sqlite3.connect(self.sql_filepath) + c = conn.cursor() + out = c.execute(cmd).fetchall() + conn.close() + return out + + def get_acquisition_datetime(self) -> str: + """Gets the acquisition datetime + + Returns + ------- + str + The acquisition datetime in ISO 8601 format. + [('2023-08-06T06:23:19.141-08:00',)] + """ + cmd = "SELECT Value FROM GlobalMetadata WHERE key='AcquisitionDateTime'" + conn = sqlite3.connect(self.sql_filepath) + c = conn.cursor() + out = c.execute(cmd).fetchall() + conn.close() + if not len(out) == 1: + raise RuntimeError("More than one acquisition datetime found.") + + return out[0][0] + + def get_general_stats(self) -> dict: + """Gets the general stats from the .d file. + + Returns + ------- + dict + A dictionary of general stats. + """ + out = { + "AcquisitionDateTime": self.get_acquisition_datetime(), + } + return out + + def write_tables(self, location): + logger.info(f"Writing tables for {self.basename}") + logger.info(f"Writing tables to {location}") + location = Path(location) + location.mkdir(parents=True, exist_ok=True) + tic = self.ms1_tic + bpc = self.ms1_bpc + npeaks = self.ms1_peaks + general_stats = self.get_general_stats() + general_stats["TotCurrent"] = sum([i for t, i in tic]) + + tic_path = location / f"tic_{self.basename}.tsv" + bpc_path = location / f"bpc_{self.basename}.tsv" + peaks_location = location / f"ms1_peaks_{self.basename}.tsv" + general_stats_location = location / f"general_stats_{self.basename}.tsv" + + logger.info(f"Writing {tic_path}") + with tic_path.open("w") as f: + for t, i in tic: + f.write(f"{t}\t{i}\n") + + logger.info(f"Writing {bpc_path}") + with bpc_path.open("w") as f: + for t, i in bpc: + f.write(f"{t}\t{i}\n") + + logger.info(f"Writing {peaks_location}") + with peaks_location.open("w") as f: + for t, i in npeaks: + f.write(f"{t}\t{i}\n") + + logger.info(f"Writing {general_stats_location}") + with general_stats_location.open("w") as f: + for k, v in general_stats.items(): + f.write(f"{k}\t{v}\n") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(add_help=True, usage=GENERAL_HELP) + parser.add_argument("input", help="Input .d file or directory of .d files.") + parser.add_argument("output", help="Output directory.") + parser.add_argument("--version", action="version", version=f"%(prog)s {VERSION}") + + args, unkargs = parser.parse_known_args() + + if unkargs: + print(f"Unknown arguments: {unkargs}") + raise RuntimeError("Unknown arguments.") + + input_path = Path(args.input) + output_path = Path(args.output) + + if input_path.is_dir() and str(input_path).endswith(".d"): + input_files = [input_path] + elif input_path.is_dir(): + input_files = list(input_path.glob("*.d")) + else: + raise RuntimeError(f"Input path {input_path} is not a file or directory.") + + output_path.mkdir(parents=True, exist_ok=True) + + for f in input_files: + d = DotDFile(f) + d.write_tables(output_path) + + logger.info(f"Writing {output_path / 'dotd_mqc.yml'}") + with (output_path / "dotd_mqc.yml").open("w") as f: + f.write(MQC_YML) diff --git a/modules/local/assemble_empirical_library/main.nf b/modules/local/assemble_empirical_library/main.nf index ac151032..526454ef 100644 --- a/modules/local/assemble_empirical_library/main.nf +++ b/modules/local/assemble_empirical_library/main.nf @@ -23,13 +23,21 @@ process ASSEMBLE_EMPIRICAL_LIBRARY { script: def args = task.ext.args ?: '' - mass_acc_ms1 = meta.precursor_mass_tolerance_unit == "ppm" ? meta.precursor_mass_tolerance : 5 - mass_acc_ms2 = meta.fragment_mass_tolerance_unit == "ppm" ? meta.fragment_mass_tolerance : 13 + mass_acc_ms1 = meta['precursormasstoleranceunit'].toLowerCase().endsWith('ppm') ? meta['precursormasstolerance'] : 5 + mass_acc_ms2 = meta['fragmentmasstoleranceunit'].toLowerCase().endsWith('ppm') ? meta['fragmentmasstolerance'] : 13 - mass_acc = params.mass_acc_automatic ? "--quick-mass-acc --individual-mass-acc" : "--mass-acc $mass_acc_ms2 --mass-acc-ms1 $mass_acc_ms1" - scan_window = params.scan_window_automatic ? "--individual-windows" : "--window $params.scan_window" + if (params.mass_acc_automatic) { + mass_acc = "--quick-mass-acc --individual-mass-acc" + } else { + mass_acc = "--mass-acc $mass_acc_ms2 --mass-acc-ms1 $mass_acc_ms1" + } + scan_window = params.scan_window_automatic ? '--individual-windows' : "--window $params.scan_window" """ + # Precursor Tolerance value was: ${meta['precursormasstolerance']} + # Fragment Tolerance value was: ${meta['fragmentmasstolerance']} + # Precursor Tolerance unit was: ${meta['precursormasstoleranceunit']} + # Fragment Tolerance unit was: ${meta['fragmentmasstoleranceunit']} ls -lcth diff --git a/modules/local/decompress_dotd/main.nf b/modules/local/decompress_dotd/main.nf new file mode 100644 index 00000000..8b01fb43 --- /dev/null +++ b/modules/local/decompress_dotd/main.nf @@ -0,0 +1,71 @@ + +process DECOMPRESS { + tag "$meta.mzml_id" + label 'process_low' + label 'process_single' + label 'error_retry' + + container 'continuumio/miniconda3:23.5.2-0-alpine' + + stageInMode { + if (task.attempt == 1) { + if (executor == "awsbatch") { + 'symlink' + } else { + 'link' + } + } else if (task.attempt == 2) { + if (executor == "awsbatch") { + 'copy' + } else { + 'symlink' + } + } else { + 'copy' + } + } + + input: + tuple val(meta), path(compressed_file) + + output: + tuple val(meta), path("*.d"), emit: decompressed_files + path "versions.yml", emit: version + path "*.log", emit: log + + script: + def prefix = task.ext.prefix ?: "${meta.mzml_id}" + + """ + function extract { + if [ -z "\$1" ]; then + echo "Usage: extract ." + else + if [ -f \$1 ]; then + case \$1 in + *.tar.gz) tar xvzf \$1 ;; + *.gz) gunzip \$1 ;; + *.tar) tar xvf \$1 ;; + *) echo "extract: '\$1' - unknown archive method" ;; + esac + else + echo "\$1 - file does not exist" + fi + fi + } + + tar --help 2>&1 | tee -a ${prefix}_decompression.log + gunzip --help 2>&1 | tee -a ${prefix}_decompression.log + echo "Unpacking..." | tee -a ${compressed_file.baseName}_decompression.log + + extract ${compressed_file} 2>&1 | tee -a ${compressed_file.baseName}_conversion.log + mv *.d ${file(compressed_file.baseName).baseName}.d + ls -l | tee -a ${compressed_file.baseName}_decompression.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(gunzip --help 2>&1 | head -1 | grep -oE "\\d+\\.\\d+(\\.\\d+)?") + tar: \$(tar --help 2>&1 | head -1 | grep -oE "\\d+\\.\\d+(\\.\\d+)?") + END_VERSIONS + """ +} diff --git a/modules/local/decompress_dotd/meta.yml b/modules/local/decompress_dotd/meta.yml new file mode 100644 index 00000000..021c09a7 --- /dev/null +++ b/modules/local/decompress_dotd/meta.yml @@ -0,0 +1,46 @@ +name: decompression +description: Decompress .tar/.gz files that contain a .d file/directory +keywords: + - raw + - bruker + - .d +tools: + - tar: + description: | + Generates and extracts archives. + homepage: https://www.gnu.org/software/tar/ + - gunzip: + description: | + Decompresses using zlib. + homepage: https://www.gnu.org/software/gzip/ +input: + - meta: + type: map + description: | + Groovy Map containing sample information + - rawfile: + type: file + description: | + Bruker Raw file archived using tar + pattern: "*.{d.tar,.tar,.gz,.d.tar.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'MD5', enzyme:trypsin ] + - dotd: + type: path + description: Raw Bruker .d file + pattern: "*.d" + - log: + type: file + description: log file + pattern: "*.log" + - version: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@jspaezp" + diff --git a/modules/local/diann_preliminary_analysis/main.nf b/modules/local/diann_preliminary_analysis/main.nf index 05a545f1..f28545e3 100644 --- a/modules/local/diann_preliminary_analysis/main.nf +++ b/modules/local/diann_preliminary_analysis/main.nf @@ -20,14 +20,25 @@ process DIANN_PRELIMINARY_ANALYSIS { script: def args = task.ext.args ?: '' - mass_acc_ms1 = meta.precursor_mass_tolerance_unit == "ppm" ? meta.precursor_mass_tolerance : 5 - mass_acc_ms2 = meta.fragment_mass_tolerance_unit == "ppm" ? meta.fragment_mass_tolerance : 13 - - mass_acc = params.mass_acc_automatic ? "--quick-mass-acc --individual-mass-acc" : "--mass-acc $mass_acc_ms2 --mass-acc-ms1 $mass_acc_ms1" - scan_window = params.scan_window_automatic ? "--individual-windows" : "--window $params.scan_window" - time_corr_only = params.time_corr_only ? "--time-corr-only" : "" + // I am using here the ["key"] syntax, since the preprocessed meta makes + // was evaluating to null when using the dot notation. + mass_acc_ms1 = meta['precursormasstoleranceunit'].toLowerCase().endsWith('ppm') ? meta['precursormasstolerance'] : 5 + mass_acc_ms2 = meta['fragmentmasstoleranceunit'].toLowerCase().endsWith('ppm') ? meta['fragmentmasstolerance'] : 13 + + if (params.mass_acc_automatic) { + mass_acc = '--quick-mass-acc --individual-mass-acc' + } else { + mass_acc = '--mass-acc $mass_acc_ms2 --mass-acc-ms1 $mass_acc_ms1' + } + scan_window = params.scan_window_automatic ? '--individual-windows' : '--window $params.scan_window' + time_corr_only = params.time_corr_only ? '--time-corr-only' : '' """ + # Precursor Tolerance value was: ${meta['precursormasstolerance']} + # Fragment Tolerance value was: ${meta['fragmentmasstolerance']} + # Precursor Tolerance unit was: ${meta['precursormasstoleranceunit']} + # Fragment Tolerance unit was: ${meta['fragmentmasstoleranceunit']} + diann --lib ${predict_tsv} \\ --f ${mzML} \\ --threads ${task.cpus} \\ diff --git a/modules/local/diannsummary/main.nf b/modules/local/diannsummary/main.nf index 1b1871e8..f3f40877 100644 --- a/modules/local/diannsummary/main.nf +++ b/modules/local/diannsummary/main.nf @@ -31,8 +31,8 @@ process DIANNSUMMARY { script: def args = task.ext.args ?: '' - mass_acc_ms1 = meta.precursor_mass_tolerance_unit == "ppm" ? meta.precursor_mass_tolerance : 5 - mass_acc_ms2 = meta.fragment_mass_tolerance_unit == "ppm" ? meta.fragment_mass_tolerance : 13 + mass_acc_ms1 = meta["precursormasstoleranceunit"].toLowerCase().endsWith("ppm") ? meta["precursormasstolerance"] : 5 + mass_acc_ms2 = meta["fragmentmasstoleranceunit"].toLowerCase().endsWith("ppm") ? meta["fragmentmasstolerance"] : 13 mass_acc = params.mass_acc_automatic ? "--quick-mass-acc --individual-mass-acc" : "--mass-acc $mass_acc_ms2 --mass-acc-ms1 $mass_acc_ms1" scan_window = params.scan_window_automatic ? "--individual-windows" : "--window $params.scan_window" diff --git a/modules/local/dotd_to_mqc/main.nf b/modules/local/dotd_to_mqc/main.nf new file mode 100644 index 00000000..8cfd9852 --- /dev/null +++ b/modules/local/dotd_to_mqc/main.nf @@ -0,0 +1,30 @@ +process DOTD2MQC { + tag "$meta.mzml_id" + label 'process_medium' + label 'process_single' + + conda "base::python=3.10" + container "continuumio/miniconda3:23.5.2-0-alpine" + + input: + tuple val(meta), path(dot_d_file) + + output: + tuple path("dotd_mqc.yml"), path("*.tsv"), emit: dotd_mqc_data + path "versions.yml", emit: version + path "*.log", emit: log + + script: + def prefix = task.ext.prefix ?: "${meta.mzml_id}" + + """ + dotd_2_mqc.py "${dot_d_file}" \${PWD} \\ + 2>&1 | tee dotd_2_mqc_${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dotd_2_mqc: \$(dotd_2_mqc.py --version | grep -oE "\\d\\.\\d\\.\\d") + dotd_2_mqc_python: \$(python --version | grep -oE "\\d\\.\\d\\.\\d") + END_VERSIONS + """ +} diff --git a/modules/local/dotd_to_mqc/meta.yml b/modules/local/dotd_to_mqc/meta.yml new file mode 100644 index 00000000..e69de29b diff --git a/modules/local/generate_diann_cfg/main.nf b/modules/local/generate_diann_cfg/main.nf index 1721bc36..bce5b516 100644 --- a/modules/local/generate_diann_cfg/main.nf +++ b/modules/local/generate_diann_cfg/main.nf @@ -2,20 +2,20 @@ process GENERATE_DIANN_CFG { tag "$meta.experiment_id" label 'process_low' - conda "conda-forge::pandas_schema bioconda::sdrf-pipelines=0.0.22" + conda 'conda-forge::pandas_schema bioconda::sdrf-pipelines=0.0.22' if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.22--pyhdfd78af_0" + container 'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.22--pyhdfd78af_0' } else { - container "quay.io/biocontainers/sdrf-pipelines:0.0.22--pyhdfd78af_0" + container 'quay.io/biocontainers/sdrf-pipelines:0.0.22--pyhdfd78af_0' } input: val(meta) output: - path "diann_config.cfg", emit: diann_cfg - path "versions.yml", emit: version - path "*.log" + path 'diann_config.cfg', emit: diann_cfg + path 'versions.yml', emit: version + path '*.log' script: def args = task.ext.args ?: '' diff --git a/modules/local/individual_final_analysis/main.nf b/modules/local/individual_final_analysis/main.nf index 0cdf1789..3c8abf1d 100644 --- a/modules/local/individual_final_analysis/main.nf +++ b/modules/local/individual_final_analysis/main.nf @@ -19,8 +19,8 @@ process INDIVIDUAL_FINAL_ANALYSIS { script: def args = task.ext.args ?: '' - mass_acc_ms1 = meta.precursor_mass_tolerance_unit == "ppm" ? meta.precursor_mass_tolerance : 5 - mass_acc_ms2 = meta.fragment_mass_tolerance_unit == "ppm" ? meta.fragment_mass_tolerance : 13 + mass_acc_ms1 = meta["precursormasstoleranceunit"].toLowerCase().endsWith("ppm") ? meta["precursormasstolerance"] : 5 + mass_acc_ms2 = meta["fragmentmasstoleranceunit"].toLowerCase().endsWith("ppm") ? meta["fragmentmasstolerance"] : 13 scan_window = params.scan_window if (params.mass_acc_automatic | params.scan_window_automatic){ @@ -30,6 +30,8 @@ process INDIVIDUAL_FINAL_ANALYSIS { } """ + # Question: why is this using echo? wouldnt just the variable replacement do the same? + diann --lib ${library} \\ --f ${mzML} \\ --fasta ${fasta} \\ diff --git a/modules/local/tdf2mzml/main.nf b/modules/local/tdf2mzml/main.nf index 56e173ad..53ec2852 100644 --- a/modules/local/tdf2mzml/main.nf +++ b/modules/local/tdf2mzml/main.nf @@ -45,15 +45,10 @@ process TDF2MZML { def prefix = task.ext.prefix ?: "${meta.mzml_id}" """ - tar --version - echo "Unpacking..." | tee --append ${rawfile.baseName}_conversion.log - tar -xvf ${rawfile} 2>&1 | tee --append ${rawfile.baseName}_conversion.log echo "Converting..." | tee --append ${rawfile.baseName}_conversion.log tdf2mzml.py -i *.d 2>&1 | tee --append ${rawfile.baseName}_conversion.log - echo "Compressing..." | tee --append ${rawfile.baseName}_conversion.log mv *.mzml ${file(rawfile.baseName).baseName}.mzML mv *.d ${file(rawfile.baseName).baseName}.d - # gzip ${file(rawfile.baseName).baseName}.mzML cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index a1b4d0a8..d8db73f5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -147,6 +147,9 @@ params { add_triqler_output = false quantify_decoys = false + // Bruker data + convert_dotd = false + // DIA-NN diann_debug = 3 scan_window = 8 diff --git a/nextflow_schema.json b/nextflow_schema.json index 5a900c52..3ad8eab0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -144,6 +144,12 @@ "description": "Which MS levels to pick as comma separated list. Leave empty for auto-detection.", "fa_icon": "fas fa-font", "help_text": "Which MS levels to pick as comma separated list, e.g. `--peakpicking_ms_levels 1,2`. Leave empty for auto-detection." + }, + "convert_dotd": { + "type": "boolean", + "description": "Convert bruker .d files to mzML", + "fa_icon": "far fa-check-square", + "help_text": "Whether to convert raw .d bruker files to .mzML" } }, "fa_icon": "far fa-chart-bar" diff --git a/subworkflows/local/file_preparation.nf b/subworkflows/local/file_preparation.nf index 46de43da..411b0712 100644 --- a/subworkflows/local/file_preparation.nf +++ b/subworkflows/local/file_preparation.nf @@ -3,30 +3,45 @@ // include { THERMORAWFILEPARSER } from '../../modules/local/thermorawfileparser/main' -include { TDF2MZML } from '../../modules/local/tdf2mzml/main' +include { TDF2MZML } from '../../modules/local/tdf2mzml/main' +include { DECOMPRESS } from '../../modules/local/decompress_dotd/main' +include { DOTD2MQC } from '../../modules/local/dotd_to_mqc/main' include { MZMLINDEXING } from '../../modules/local/openms/mzmlindexing/main' include { MZMLSTATISTICS } from '../../modules/local/mzmlstatistics/main' include { OPENMSPEAKPICKER } from '../../modules/local/openms/openmspeakpicker/main' workflow FILE_PREPARATION { take: - ch_mzmls // channel: [ val(meta), raw/mzml/d.tar ] + ch_rawfiles // channel: [ val(meta), raw/mzml/d.tar ] main: ch_versions = Channel.empty() ch_results = Channel.empty() ch_statistics = Channel.empty() + ch_mqc_data = Channel.empty() + + // Divide the compressed files + ch_rawfiles + .branch { + dottar: WorkflowQuantms.hasExtension(it[1], '.tar') + dotgz: WorkflowQuantms.hasExtension(it[1], '.tar') + gz: WorkflowQuantms.hasExtension(it[1], '.gz') + uncompressed: true + }.set { ch_branched_input } + + compressed_files = ch_branched_input.dottar.mix(ch_branched_input.dotgz, ch_branched_input.gz) + DECOMPRESS(compressed_files) + ch_versions = ch_versions.mix(DECOMPRESS.out.version) + ch_rawfiles = ch_branched_input.uncompressed.mix(DECOMPRESS.out.decompressed_files) // // Divide mzml files - // - ch_mzmls + ch_rawfiles .branch { - raw: WorkflowQuantms.hasExtension(it[1], 'raw') - mzML: WorkflowQuantms.hasExtension(it[1], 'mzML') - dotD: WorkflowQuantms.hasExtension(it[1], '.d.tar') - } - .set { ch_branched_input } + raw: WorkflowQuantms.hasExtension(it[1], '.raw') + mzML: WorkflowQuantms.hasExtension(it[1], '.mzML') + dotd: WorkflowQuantms.hasExtension(it[1], '.d') + }.set { ch_branched_input } //TODO we could also check for outdated mzML versions and try to update them ch_branched_input.mzML @@ -63,10 +78,20 @@ workflow FILE_PREPARATION { ch_results.map{ it -> [it[0], it[1]] }.set{ indexed_mzml_bundle } - TDF2MZML( ch_branched_input.dotD ) - ch_versions = ch_versions.mix(TDF2MZML.out.version) - ch_results = indexed_mzml_bundle.mix(TDF2MZML.out.dotd_files) - indexed_mzml_bundle = indexed_mzml_bundle.mix(TDF2MZML.out.mzmls_converted) + // Exctract qc data from .d files + DOTD2MQC( ch_branched_input.dotd ) + ch_mqc_data = ch_mqc_data.mix(DOTD2MQC.out.dotd_mqc_data.map{ it -> it[1] }.collect()) + ch_versions = ch_versions.mix(DOTD2MQC.out.version) + + // Convert .d files to mzML + if (params.convert_dotd) { + TDF2MZML( ch_branched_input.dotd ) + ch_versions = ch_versions.mix(TDF2MZML.out.version) + ch_results = indexed_mzml_bundle.mix(TDF2MZML.out.mzmls_converted) + // indexed_mzml_bundle = indexed_mzml_bundle.mix(TDF2MZML.out.mzmls_converted) + } else{ + ch_results = indexed_mzml_bundle.mix(ch_branched_input.dotd) + } MZMLSTATISTICS( indexed_mzml_bundle ) ch_statistics = ch_statistics.mix(MZMLSTATISTICS.out.mzml_statistics.collect()) @@ -82,9 +107,9 @@ workflow FILE_PREPARATION { ch_results = OPENMSPEAKPICKER.out.mzmls_picked } - emit: results = ch_results // channel: [val(mzml_id), indexedmzml|.d.tar] statistics = ch_statistics // channel: [ *_mzml_info.tsv ] + mqc_custom_data = ch_mqc_data // channel: [ *.yaml, *.tsv ] version = ch_versions // channel: [ *.version.txt ] } diff --git a/workflows/dia.nf b/workflows/dia.nf index 31198c74..f6b5eeab 100644 --- a/workflows/dia.nf +++ b/workflows/dia.nf @@ -20,7 +20,6 @@ include { DIANNSUMMARY } from '../modules/local/diannsummary/m // SUBWORKFLOWS: Consisting of a mix of local and nf-core/modules // - /* ======================================================================================== RUN MAIN WORKFLOW @@ -39,15 +38,16 @@ workflow DIA { main: ch_software_versions = Channel.empty() - Channel.fromPath(params.database).set{ ch_searchdb } + Channel.fromPath(params.database).set { ch_searchdb } ch_file_preparation_results.multiMap { - meta: preprocessed_meta(it[0]) - ms_file: it[1] - } - .set { ch_result } + result -> + meta: preprocessed_meta(result[0]) + ms_file:result[1] + } + .set { ch_result } - meta = ch_result.meta.unique {it[0]} + meta = ch_result.meta.unique { it[0] } DIANNCFG(meta) ch_software_versions = ch_software_versions.mix(DIANNCFG.out.version.ifEmpty(null)) @@ -55,14 +55,13 @@ workflow DIA { // // MODULE: SILICOLIBRARYGENERATION // - if (!params.diann_speclib) { + if (params.diann_speclib) { + speclib = Channel.fromPath(params.diann_speclib) + } else { SILICOLIBRARYGENERATION(ch_searchdb, DIANNCFG.out.diann_cfg) speclib = SILICOLIBRARYGENERATION.out.predict_speclib - } else { - speclib = Channel.fromPath(params.diann_speclib) } - // // MODULE: DIANN_PRELIMINARY_ANALYSIS // @@ -72,23 +71,36 @@ workflow DIA { // // MODULE: ASSEMBLE_EMPIRICAL_LIBRARY // - ASSEMBLE_EMPIRICAL_LIBRARY(ch_result.ms_file.collect(), - meta, - DIANN_PRELIMINARY_ANALYSIS.out.diann_quant.collect(), - speclib - ) + // Order matters in DIANN, This shoudl be sorted for reproducible results. + ASSEMBLE_EMPIRICAL_LIBRARY( + ch_result.ms_file.collect(), + meta, + DIANN_PRELIMINARY_ANALYSIS.out.diann_quant.collect(), + speclib + ) ch_software_versions = ch_software_versions.mix(ASSEMBLE_EMPIRICAL_LIBRARY.out.version.ifEmpty(null)) // // MODULE: INDIVIDUAL_FINAL_ANALYSIS // - INDIVIDUAL_FINAL_ANALYSIS(ch_file_preparation_results.combine(ch_searchdb).combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.log).combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library)) + INDIVIDUAL_FINAL_ANALYSIS( + ch_file_preparation_results + .combine(ch_searchdb) + .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.log) + .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library) + ) ch_software_versions = ch_software_versions.mix(INDIVIDUAL_FINAL_ANALYSIS.out.version.ifEmpty(null)) // // MODULE: DIANNSUMMARY // - ms_file_names = ch_result.ms_file.map{ msfile -> msfile.getName() }.collect() + // Order matters in DIANN, This should be sorted for reproducible results. + // NOTE: I am getting here the names of the ms files, not the path. + // Since the next step only needs the name (since it uses the cached .quant) + ch_result + .ms_file.map { msfile -> msfile.getName() } + .collect() + .set { ms_file_names } DIANNSUMMARY(ms_file_names, meta, ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library, INDIVIDUAL_FINAL_ANALYSIS.out.diann_quant.collect(), ch_searchdb) ch_software_versions = ch_software_versions.mix(DIANNSUMMARY.out.version.ifEmpty(null)) @@ -96,14 +108,20 @@ workflow DIA { // // MODULE: DIANNCONVERT // - DIANNCONVERT(DIANNSUMMARY.out.main_report, ch_expdesign, DIANNSUMMARY.out.pg_matrix, DIANNSUMMARY.out.pr_matrix, ch_mzml_info, - meta, ch_searchdb, DIANNSUMMARY.out.version) + DIANNCONVERT( + DIANNSUMMARY.out.main_report, ch_expdesign, + DIANNSUMMARY.out.pg_matrix, + DIANNSUMMARY.out.pr_matrix, ch_mzml_info, + meta, + ch_searchdb, + DIANNSUMMARY.out.version + ) ch_software_versions = ch_software_versions.mix(DIANNCONVERT.out.version.ifEmpty(null)) // // MODULE: MSSTATS ch_msstats_out = Channel.empty() - if(!params.skip_post_msstats){ + if (!params.skip_post_msstats) { MSSTATS(DIANNCONVERT.out.out_msstats) ch_msstats_out = MSSTATS.out.msstats_csv ch_software_versions = ch_software_versions.mix(MSSTATS.out.version.ifEmpty(null)) @@ -115,24 +133,22 @@ workflow DIA { msstats_in = DIANNCONVERT.out.out_msstats out_triqler = DIANNCONVERT.out.out_triqler msstats_out = ch_msstats_out - } - // remove meta.id to make sure cache identical HashCode -def preprocessed_meta(LinkedHashMap meta){ +def preprocessed_meta(LinkedHashMap meta) { def parameters = [:] - parameters["experiment_id"] = meta.experiment_id - parameters["acquisition_method"] = meta.acquisition_method - parameters["dissociationmethod"] = meta.dissociationmethod - parameters["labelling_type"] = meta.labelling_type - parameters["fixedmodifications"] = meta.fixedmodifications - parameters["variablemodifications"] = meta.variablemodifications - parameters["precursormasstolerance"] = meta.precursormasstolerance - parameters["precursormasstoleranceunit"] = meta.precursormasstoleranceunit - parameters["fragmentmasstolerance"] = meta.fragmentmasstolerance - parameters["fragmentmasstoleranceunit"] = meta.fragmentmasstoleranceunit - parameters["enzyme"] = meta.enzyme + parameters['experiment_id'] = meta.experiment_id + parameters['acquisition_method'] = meta.acquisition_method + parameters['dissociationmethod'] = meta.dissociationmethod + parameters['labelling_type'] = meta.labelling_type + parameters['fixedmodifications'] = meta.fixedmodifications + parameters['variablemodifications'] = meta.variablemodifications + parameters['precursormasstolerance'] = meta.precursormasstolerance + parameters['precursormasstoleranceunit'] = meta.precursormasstoleranceunit + parameters['fragmentmasstolerance'] = meta.fragmentmasstolerance + parameters['fragmentmasstoleranceunit'] = meta.fragmentmasstoleranceunit + parameters['enzyme'] = meta.enzyme return parameters } diff --git a/workflows/quantms.nf b/workflows/quantms.nf index b9c51946..04705ab1 100644 --- a/workflows/quantms.nf +++ b/workflows/quantms.nf @@ -180,6 +180,7 @@ workflow QUANTMS { ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(FILE_PREPARATION.out.statistics) + ch_multiqc_files = ch_multiqc_files.mix(FILE_PREPARATION.out.mqc_custom_data) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_quantms_logo = file("$projectDir/assets/nf-core-quantms_logo_light.png") From f4d8cbe85664a8462e78cd6a941ef2749d88148f Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 17 Aug 2023 05:28:32 -0700 Subject: [PATCH 032/113] incorporated code review notes --- CHANGELOG.md | 4 ++++ bin/diann_convert.py | 24 +------------------ .../local/diann_preliminary_analysis/main.nf | 10 ++++---- modules/local/pmultiqc/main.nf | 6 +---- modules/local/sdrfparsing/main.nf | 4 ++++ pyproject.toml | 2 +- 6 files changed, 16 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b478024d..4abac9ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,10 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#275 BigBio](https://github.com/bigbio/quantms/pull/275) Added support for bruker data. And speed-up to DIA-NN pipeline. + ### `Changed` ### `Fixed` +- Fixed bug where modification masses were not calculated correctly in DIA-NN conversion. + ### `Dependencies` ### `Parameters` diff --git a/bin/diann_convert.py b/bin/diann_convert.py index 84269b3c..eaa31f52 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -597,8 +597,7 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): protein_details_df = ( protein_details_df.drop("accession", axis=1).join(prh_series).reset_index().drop(columns="index") ) - # Q: how is the next line different from `df.loc[:, "col"] = 'protein_details'` ?? - protein_details_df.loc[:, "opt_global_result_type"] = protein_details_df.apply(lambda x: "protein_details", axis=1) + protein_details_df.loc[:, "col"] = "protein_details" # protein_details_df = protein_details_df[-protein_details_df["accession"].str.contains("-")] out_mztab_PRH = pd.concat([out_mztab_PRH, protein_details_df]).reset_index(drop=True) @@ -1122,27 +1121,6 @@ def find_modification(peptide): return original_mods -def calculate_mz(seq, charge): - """ - Calculate the precursor m/z based on the peptide sequence and charge state. - - :param seq: Peptide sequence - :type seq: str - :param charge: charge state - :type charge: int - :return: - """ - # Q: is this faster if we make it a set? and maybe make it a global variable? - ref = "ARNDBCEQZGHILKMFPSTWYV" - - # Q: Does this mean that all modified peptides will have a wrong m/z? - seq = "".join([i for i in seq if i in ref]) - if charge == "": - return None - else: - return AASequence.fromString(seq).getMZ(int(charge)) - - def name_mapper_builder(subname_mapper): """Returns a function that renames the columns of the grouped table to match the ones in the final table. diff --git a/modules/local/diann_preliminary_analysis/main.nf b/modules/local/diann_preliminary_analysis/main.nf index f28545e3..3e62d62d 100644 --- a/modules/local/diann_preliminary_analysis/main.nf +++ b/modules/local/diann_preliminary_analysis/main.nf @@ -1,5 +1,5 @@ process DIANN_PRELIMINARY_ANALYSIS { - tag "$mzML.baseName" + tag "$ms_file.baseName" label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -7,7 +7,7 @@ process DIANN_PRELIMINARY_ANALYSIS { 'biocontainers/diann:v1.8.1_cv1' }" input: - tuple val(meta), path(mzML), path(predict_tsv) + tuple val(meta), path(ms_file), path(predict_library) output: path "*.quant", emit: diann_quant @@ -39,8 +39,8 @@ process DIANN_PRELIMINARY_ANALYSIS { # Precursor Tolerance unit was: ${meta['precursormasstoleranceunit']} # Fragment Tolerance unit was: ${meta['fragmentmasstoleranceunit']} - diann --lib ${predict_tsv} \\ - --f ${mzML} \\ + diann --lib ${predict_library} \\ + --f ${ms_file} \\ --threads ${task.cpus} \\ --verbose $params.diann_debug \\ ${scan_window} \\ @@ -50,7 +50,7 @@ process DIANN_PRELIMINARY_ANALYSIS { ${mass_acc} \\ ${time_corr_only} \\ $args \\ - 2>&1 | tee ${mzML.baseName}_diann.log + 2>&1 | tee ${ms_file.baseName}_diann.log cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index a17b9281..a1cc3359 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -25,13 +25,9 @@ process PMULTIQC { def disable_table_plots = (params.enable_pmultiqc) && (params.skip_table_plots) ? "--disable_table" : "" """ - # TODO remove the next line, it is only for debugging + # leaving here to ease debugging ls -lcth * - # Current hack to attempt matching file stems and not file extensions - # sed -i -e "s/((.d.tar)|(.d)|(.mzML)|(.mzml))\\t/\\t/g" - sed -i -e "s/.tar\\t/\\t/g" results/*openms_design.tsv - multiqc \\ -f \\ --config ./results/multiqc_config.yml \\ diff --git a/modules/local/sdrfparsing/main.nf b/modules/local/sdrfparsing/main.nf index aec416aa..f4795f00 100644 --- a/modules/local/sdrfparsing/main.nf +++ b/modules/local/sdrfparsing/main.nf @@ -39,6 +39,10 @@ process SDRFPARSING { # remove it here than doing the forensic tracking back of the file. sed -i -e "s/((.tar)|(.tar.gz))\\t/\\t/g" ${sdrf.baseName}_openms_design.tsv + # TODO: since I added support for .gz ... how are we aliasing? + # if someone packs a .d in a .gz (not .d.gz or .d.tar.gz), how should we + # know what extension to keep? + # I am almost sure these do need to be as they exist in the file system # before execution. # sed -i -e "s/((.tar)|(.tar.gz))\\t/\\t/g" ${sdrf.baseName}_config.tsv diff --git a/pyproject.toml b/pyproject.toml index 20551e61..814dd46f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,4 +10,4 @@ known_first_party = ["nf_core"] multi_line_output = 3 [tool.ruff] -line-length = 120 \ No newline at end of file +line-length = 120 From 062e6ba37a50afe3bd712bb7ac2a10fe1e2c4ad7 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 17 Aug 2023 05:36:11 -0700 Subject: [PATCH 033/113] minor fix on nf-core linting --- modules/local/preprocess_expdesign.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/local/preprocess_expdesign.nf b/modules/local/preprocess_expdesign.nf index 05fb4a55..cc50e98d 100644 --- a/modules/local/preprocess_expdesign.nf +++ b/modules/local/preprocess_expdesign.nf @@ -4,7 +4,6 @@ // accept different file endings already? process PREPROCESS_EXPDESIGN { tag "$design.Name" - label 'process_low' label 'process_single' conda "bioconda::sdrf-pipelines=0.0.22" From 6c933e480341c4a57f624a25f26c5eeb33aff157 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 31 Aug 2023 23:00:18 -0700 Subject: [PATCH 034/113] whitespace related linting --- modules/local/decompress_dotd/main.nf | 28 +++++++++++++------------- modules/local/decompress_dotd/meta.yml | 5 ++--- modules/local/sdrfparsing/main.nf | 4 ++-- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/modules/local/decompress_dotd/main.nf b/modules/local/decompress_dotd/main.nf index 8b01fb43..eb5fcfb8 100644 --- a/modules/local/decompress_dotd/main.nf +++ b/modules/local/decompress_dotd/main.nf @@ -38,20 +38,20 @@ process DECOMPRESS { """ function extract { - if [ -z "\$1" ]; then - echo "Usage: extract ." - else - if [ -f \$1 ]; then - case \$1 in - *.tar.gz) tar xvzf \$1 ;; - *.gz) gunzip \$1 ;; - *.tar) tar xvf \$1 ;; - *) echo "extract: '\$1' - unknown archive method" ;; - esac - else - echo "\$1 - file does not exist" - fi - fi + if [ -z "\$1" ]; then + echo "Usage: extract ." + else + if [ -f \$1 ]; then + case \$1 in + *.tar.gz) tar xvzf \$1 ;; + *.gz) gunzip \$1 ;; + *.tar) tar xvf \$1 ;; + *) echo "extract: '\$1' - unknown archive method" ;; + esac + else + echo "\$1 - file does not exist" + fi + fi } tar --help 2>&1 | tee -a ${prefix}_decompression.log diff --git a/modules/local/decompress_dotd/meta.yml b/modules/local/decompress_dotd/meta.yml index 021c09a7..b17737a2 100644 --- a/modules/local/decompress_dotd/meta.yml +++ b/modules/local/decompress_dotd/meta.yml @@ -7,11 +7,11 @@ keywords: tools: - tar: description: | - Generates and extracts archives. + Generates and extracts archives. homepage: https://www.gnu.org/software/tar/ - gunzip: description: | - Decompresses using zlib. + Decompresses using zlib. homepage: https://www.gnu.org/software/gzip/ input: - meta: @@ -43,4 +43,3 @@ output: pattern: "versions.yml" authors: - "@jspaezp" - diff --git a/modules/local/sdrfparsing/main.nf b/modules/local/sdrfparsing/main.nf index f4795f00..2ca65a2f 100644 --- a/modules/local/sdrfparsing/main.nf +++ b/modules/local/sdrfparsing/main.nf @@ -38,11 +38,11 @@ process SDRFPARSING { # all logs from the tool match only the .d suffix. so it is easier to # remove it here than doing the forensic tracking back of the file. sed -i -e "s/((.tar)|(.tar.gz))\\t/\\t/g" ${sdrf.baseName}_openms_design.tsv - + # TODO: since I added support for .gz ... how are we aliasing? # if someone packs a .d in a .gz (not .d.gz or .d.tar.gz), how should we # know what extension to keep? - + # I am almost sure these do need to be as they exist in the file system # before execution. # sed -i -e "s/((.tar)|(.tar.gz))\\t/\\t/g" ${sdrf.baseName}_config.tsv From 282b9aaeb8313a283456f876172a00fe9ed28c54 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 31 Aug 2023 23:02:36 -0700 Subject: [PATCH 035/113] prettier autofix of quotes --- assets/multiqc_config.yml | 66 +++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index bc756b51..c22bafb0 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -19,42 +19,42 @@ custom_logo_url: "https://github.com/bigbio/quantms" custom_logo_title: "quantms" custom_data: - total_ion_chromatograms: - file_format: 'tsv' - section_name: 'MS1 TIC' - description: 'MS1 total ion chromatograms extracted from the .d files' - plot_type: 'linegraph' - pconfig: - id: 'ms1_tic' - title: 'MS1 TIC' - ylab: 'Ion Count' - ymin: 0 - base_peak_chromatograms: - file_format: 'tsv' - section_name: 'MS1 BPC' - description: 'MS1 base peak chromatograms extracted from the .d files' - plot_type: 'linegraph' - pconfig: - id: 'ms1_bpc' - title: 'MS1 BPC' - ylab: 'Ion Count' - ymin: 0 - number_of_peaks: - file_format: 'tsv' - section_name: 'MS1 Peaks' - description: 'MS1 Peaks from the .d files' - plot_type: 'linegraph' - pconfig: - id: 'ms1_peaks' - title: 'MS1 Peaks' - ylab: 'Peak Count' - ymin: 0 + total_ion_chromatograms: + file_format: "tsv" + section_name: "MS1 TIC" + description: "MS1 total ion chromatograms extracted from the .d files" + plot_type: "linegraph" + pconfig: + id: "ms1_tic" + title: "MS1 TIC" + ylab: "Ion Count" + ymin: 0 + base_peak_chromatograms: + file_format: "tsv" + section_name: "MS1 BPC" + description: "MS1 base peak chromatograms extracted from the .d files" + plot_type: "linegraph" + pconfig: + id: "ms1_bpc" + title: "MS1 BPC" + ylab: "Ion Count" + ymin: 0 + number_of_peaks: + file_format: "tsv" + section_name: "MS1 Peaks" + description: "MS1 Peaks from the .d files" + plot_type: "linegraph" + pconfig: + id: "ms1_peaks" + title: "MS1 Peaks" + ylab: "Peak Count" + ymin: 0 sp: total_ion_chromatograms: - fn: 'tic_*' + fn: "tic_*" base_peak_chromatograms: - fn: 'bpc_*' + fn: "bpc_*" number_of_peaks: - fn: 'ms1_peaks_*' + fn: "ms1_peaks_*" quantms/exp_design: fn: "*_design.tsv" From aa32722e62d4f7738cfd03a3f5a87d30487d8f59 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sat, 2 Sep 2023 23:54:48 -0700 Subject: [PATCH 036/113] Experimental/bruker agg metrics (#3) * more report info * bugfix on variable replacement * bugfix, unavailable variable * added debug info --- assets/multiqc_config.yml | 7 + bin/dotd_2_mqc.py | 176 +++++++++++++++--- modules/local/decompress_dotd/main.nf | 14 +- .../local/diann_preliminary_analysis/main.nf | 2 +- modules/local/dotd_to_mqc/main.nf | 43 ++++- modules/local/pmultiqc/main.nf | 17 ++ modules/local/sdrfparsing/main.nf | 2 +- subworkflows/local/file_preparation.nf | 15 +- 8 files changed, 230 insertions(+), 46 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index c22bafb0..d88cd2e4 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -49,6 +49,11 @@ custom_data: title: "MS1 Peaks" ylab: "Peak Count" ymin: 0 + general_stats: + file_format: "tsv" + section_name: "General Stats" + description: "General stats from the .d files" + plot_type: "table" sp: total_ion_chromatograms: fn: "tic_*" @@ -56,5 +61,7 @@ sp: fn: "bpc_*" number_of_peaks: fn: "ms1_peaks_*" + general_stats: + fn: "general_stats.tsv" quantms/exp_design: fn: "*_design.tsv" diff --git a/bin/dotd_2_mqc.py b/bin/dotd_2_mqc.py index 011e302d..dc868b09 100755 --- a/bin/dotd_2_mqc.py +++ b/bin/dotd_2_mqc.py @@ -10,7 +10,14 @@ - dotd_mqc.yml Usage: - $ python dotd_2_mqc.py + $ python dotd_2_mqc.py single + $ python dotd_2_mqc.py single + $ python dotd_2_mqc.py aggregate + + # These last steps can also be + $ python dotd_2_mqc.py single + # If the input directory contains multiple .d files. + $ cd $ multiqc -c dotd_mqc.yml . """ @@ -22,10 +29,13 @@ from pathlib import Path # noqa: E402 from dataclasses import dataclass # noqa: E402 from logging import getLogger # noqa: E402 +import logging # noqa: E402 -VERSION = "0.0.1" +VERSION = "0.0.2" +logging.basicConfig(level=logging.DEBUG) logger = getLogger(__name__) +SECOND_RESOLUTION = 5 MQC_YML = """ custom_data: total_ion_chromatograms: @@ -58,6 +68,11 @@ title: 'MS1 Peaks' ylab: 'Peak Count' ymin: 0 + general_stats: + file_format: 'tsv' + section_name: 'General Stats' + description: 'General stats from the .d files' + plot_type: 'table' sp: total_ion_chromatograms: fn: 'tic_*' @@ -66,7 +81,7 @@ number_of_peaks: fn: 'ms1_peaks_*' general_stats: - fn: 'general_stats_*' + fn: 'general_stats.tsv' """ @@ -90,10 +105,14 @@ def ms1_tic(self) -> List[Tuple[float, float]]: Returns: List[Tuple[float, float]]: List of (time, intensity) tuples. """ - cmd = """ - SELECT CAST(Time AS INTEGER), AVG(SummedIntensities) + # Note that here I am using min and not mean for purely qc reasons. + # Since the diagnostic aspect here is mainly to see major fluctuations + # in the intensity, and usually these are scans with very low intensity + # due to bubbles or ionization issues, thus the mean would hide that. + cmd = f""" + SELECT MIN(Time), MIN(SummedIntensities) FROM frames WHERE MsMsType = '0' - GROUP BY CAST(Time AS INTEGER) + GROUP BY CAST(Time / {SECOND_RESOLUTION} AS INTEGER) ORDER BY Time """ conn = sqlite3.connect(self.sql_filepath) @@ -109,10 +128,10 @@ def ms1_bpc(self) -> List[Tuple[float, float]]: Returns: List[Tuple[float, float]]: List of (time, intensity) tuples. """ - cmd = """ - SELECT CAST(Time AS INTEGER), MAX(MaxIntensity) + cmd = f""" + SELECT MIN(Time), MAX(MaxIntensity) FROM frames WHERE MsMsType = '0' - GROUP BY CAST(Time AS INTEGER) + GROUP BY CAST(Time / {SECOND_RESOLUTION} AS INTEGER) ORDER BY Time """ conn = sqlite3.connect(self.sql_filepath) @@ -128,10 +147,10 @@ def ms1_peaks(self) -> List[Tuple[float, float]]: Returns: List[Tuple[float, float]]: List of (time, intensity) tuples. """ - cmd = """ - SELECT CAST(Time AS INTEGER), AVG(NumPeaks) + cmd = f""" + SELECT MIN(Time), AVG(NumPeaks) FROM frames WHERE MsMsType = '0' - GROUP BY CAST(Time AS INTEGER) + GROUP BY CAST(Time / {SECOND_RESOLUTION} AS INTEGER) ORDER BY Time """ conn = sqlite3.connect(self.sql_filepath) @@ -158,6 +177,48 @@ def get_acquisition_datetime(self) -> str: raise RuntimeError("More than one acquisition datetime found.") return out[0][0] + + def get_tot_current(self) -> float: + """Gets the total current from the ms1 scans. + + Returns + ------- + float + The total current. + """ + cmd = """ + SELECT SUM(CAST(SummedIntensities AS FLOAT)) + FROM frames WHERE MsMsType = '0' + """ + conn = sqlite3.connect(self.sql_filepath) + c = conn.cursor() + out = c.execute(cmd).fetchall() + conn.close() + if not len(out) == 1: + raise RuntimeError("More than one total current found.") + + return out[0][0] + + def get_dia_scan_current(self) -> float: + """Gets the total current from the ms2 scans. + + Returns + ------- + float + The total current. + """ + cmd = """ + SELECT SUM(CAST(SummedIntensities AS FLOAT)) + FROM frames WHERE MsMsType = '9' + """ + conn = sqlite3.connect(self.sql_filepath) + c = conn.cursor() + out = c.execute(cmd).fetchall() + conn.close() + if not len(out) == 1: + raise RuntimeError("More than one total current found.") + + return out[0][0] def get_general_stats(self) -> dict: """Gets the general stats from the .d file. @@ -169,6 +230,8 @@ def get_general_stats(self) -> dict: """ out = { "AcquisitionDateTime": self.get_acquisition_datetime(), + "TotalCurrent": self.get_tot_current(), + "DIA_ScanCurrent": self.get_dia_scan_current(), } return out @@ -181,7 +244,6 @@ def write_tables(self, location): bpc = self.ms1_bpc npeaks = self.ms1_peaks general_stats = self.get_general_stats() - general_stats["TotCurrent"] = sum([i for t, i in tic]) tic_path = location / f"tic_{self.basename}.tsv" bpc_path = location / f"bpc_{self.basename}.tsv" @@ -209,21 +271,7 @@ def write_tables(self, location): f.write(f"{k}\t{v}\n") -if __name__ == "__main__": - parser = argparse.ArgumentParser(add_help=True, usage=GENERAL_HELP) - parser.add_argument("input", help="Input .d file or directory of .d files.") - parser.add_argument("output", help="Output directory.") - parser.add_argument("--version", action="version", version=f"%(prog)s {VERSION}") - - args, unkargs = parser.parse_known_args() - - if unkargs: - print(f"Unknown arguments: {unkargs}") - raise RuntimeError("Unknown arguments.") - - input_path = Path(args.input) - output_path = Path(args.output) - +def main_single(input_path, output_path): if input_path.is_dir() and str(input_path).endswith(".d"): input_files = [input_path] elif input_path.is_dir(): @@ -240,3 +288,75 @@ def write_tables(self, location): logger.info(f"Writing {output_path / 'dotd_mqc.yml'}") with (output_path / "dotd_mqc.yml").open("w") as f: f.write(MQC_YML) + + if len(input_files) > 1: + logger.info("Writing aggregate general stats.") + main_aggregate(output_path, output_path) + + logger.info("Done.") + + +def main_aggregate(input_path, output_path): + # Find the general stats files + if not input_path.is_dir(): + logger.error(f"Input path {input_path} is not a directory.") + raise ValueError("Input path must be a directory.") + + general_stats_files = list(input_path.glob("general_stats_*.tsv")) + if not general_stats_files: + logger.error(f"No general stats files found in {input_path}.") + raise ValueError("No general stats files found.") + + # Merge them to a single table + # Effectively transposing the columns and adding column called file, + # which contains the file name from which the stats were acquired. + logger.info("Merging general stats files.") + general_stats = [] + for f in general_stats_files: + curr_stats = {'file': f.stem.replace("general_stats_", "")} + with f.open("r") as fh: + for line in fh: + line = line.strip() + if not line: + continue + k, v = line.split("\t") + curr_stats[k] = v + + general_stats.append(curr_stats) + + # Write the general stats file + logger.info("Writing general stats file.") + with (output_path / "general_stats.tsv").open("w") as f: + f.write("\t".join(general_stats[0].keys()) + "\n") + for s in general_stats: + f.write("\t".join(s.values()) + "\n") + + +if __name__ == "__main__": + # create the top-level parser + parser = argparse.ArgumentParser(add_help=True, usage=GENERAL_HELP) + parser.add_argument("--version", action="version", version=f"%(prog)s {VERSION}") + subparsers = parser.add_subparsers(required=True) + + # create the parser for the "single" command + parser_foo = subparsers.add_parser('single') + parser_foo.add_argument("input", help="Input .d file or directory of .d files.") + parser_foo.add_argument("output", help="Output directory.") + parser_foo.set_defaults(func=main_single) + + # create the parser for the "aggregate" command + parser_bar = subparsers.add_parser('aggregate') + parser_bar.add_argument("input", help="Directory that contains the general stats files to aggregate.") + parser_bar.add_argument("output", help="Output directory.") + parser_bar.set_defaults(func=main_aggregate) + + # parse the args and call whatever function was selected + args, unkargs = parser.parse_known_args() + if unkargs: + print(f"Unknown arguments: {unkargs}") + raise RuntimeError("Unknown arguments.") + + input_path = Path(args.input) + output_path = Path(args.output) + + args.func(input_path, output_path) diff --git a/modules/local/decompress_dotd/main.nf b/modules/local/decompress_dotd/main.nf index eb5fcfb8..3e091f39 100644 --- a/modules/local/decompress_dotd/main.nf +++ b/modules/local/decompress_dotd/main.nf @@ -42,14 +42,14 @@ process DECOMPRESS { echo "Usage: extract ." else if [ -f \$1 ]; then - case \$1 in - *.tar.gz) tar xvzf \$1 ;; - *.gz) gunzip \$1 ;; - *.tar) tar xvf \$1 ;; - *) echo "extract: '\$1' - unknown archive method" ;; - esac + case \$1 in + *.tar.gz) tar xvzf \$1 ;; + *.gz) gunzip \$1 ;; + *.tar) tar xvf \$1 ;; + *) echo "extract: '\$1' - unknown archive method" ;; + esac else - echo "\$1 - file does not exist" + echo "\$1 - file does not exist" fi fi } diff --git a/modules/local/diann_preliminary_analysis/main.nf b/modules/local/diann_preliminary_analysis/main.nf index 3e62d62d..997d01a4 100644 --- a/modules/local/diann_preliminary_analysis/main.nf +++ b/modules/local/diann_preliminary_analysis/main.nf @@ -28,7 +28,7 @@ process DIANN_PRELIMINARY_ANALYSIS { if (params.mass_acc_automatic) { mass_acc = '--quick-mass-acc --individual-mass-acc' } else { - mass_acc = '--mass-acc $mass_acc_ms2 --mass-acc-ms1 $mass_acc_ms1' + mass_acc = "--mass-acc $mass_acc_ms2 --mass-acc-ms1 $mass_acc_ms1" } scan_window = params.scan_window_automatic ? '--individual-windows' : '--window $params.scan_window' time_corr_only = params.time_corr_only ? '--time-corr-only' : '' diff --git a/modules/local/dotd_to_mqc/main.nf b/modules/local/dotd_to_mqc/main.nf index 8cfd9852..b2b464f8 100644 --- a/modules/local/dotd_to_mqc/main.nf +++ b/modules/local/dotd_to_mqc/main.nf @@ -1,16 +1,21 @@ -process DOTD2MQC { - tag "$meta.mzml_id" - label 'process_medium' +/* groovylint-disable DuplicateStringLiteral */ +process DOTD2MQC_INDIVIDUAL { + tag "$meta.experiment_id" label 'process_single' conda "base::python=3.10" container "continuumio/miniconda3:23.5.2-0-alpine" input: + // Note: This step can be optimized by staging only the + // .tdf file inside the .d directory. + // Thus reducing the data transfer of the rest of the .d + // directory. IN PARTICULAR the .tdf.bin tuple val(meta), path(dot_d_file) output: tuple path("dotd_mqc.yml"), path("*.tsv"), emit: dotd_mqc_data + path "general_stats*.tsv", emit: general_stats path "versions.yml", emit: version path "*.log", emit: log @@ -18,7 +23,7 @@ process DOTD2MQC { def prefix = task.ext.prefix ?: "${meta.mzml_id}" """ - dotd_2_mqc.py "${dot_d_file}" \${PWD} \\ + dotd_2_mqc.py single "${dot_d_file}" \${PWD} \\ 2>&1 | tee dotd_2_mqc_${prefix}.log cat <<-END_VERSIONS > versions.yml @@ -28,3 +33,33 @@ process DOTD2MQC { END_VERSIONS """ } + + +process DOTD2MQC_AGGREGATE { + label 'process_single' + + conda 'base::python=3.10' + container 'continuumio/miniconda3:23.5.2-0-alpine' + + input: + path '*' // tsv files from DOTD2MQC_INDIVIDUAL + + output: + path 'general_stats.tsv', emit: dotd_mqc_data + path 'versions.yml', emit: version + path '*.log', emit: log + + script: + """ + ls -lcth + + dotd_2_mqc.py aggregate \${PWD} \${PWD} \\ + 2>&1 | tee dotd_2_mqc_agg.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dotd_2_mqc: \$(dotd_2_mqc.py --version | grep -oE "\\d\\.\\d\\.\\d") + dotd_2_mqc_python: \$(python --version | grep -oE "\\d\\.\\d\\.\\d") + END_VERSIONS + """ +} diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index a1cc3359..c0e1fbee 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -25,9 +25,26 @@ process PMULTIQC { def disable_table_plots = (params.enable_pmultiqc) && (params.skip_table_plots) ? "--disable_table" : "" """ + set -x + set -e + # leaving here to ease debugging ls -lcth * + echo ">>>>>>>>> Experimental Design <<<<<<<<<" + cat results/*openms_design.tsv + + # I attempted making this expression match prior + # to tabs but that does not seem to work (it might be a groovy escaping issue) + # and should be fixed when https://github.com/bigbio/pmultiqc/issues/80 + # gets solved. + # Current hack to attempt matching file stems and not file extensions + sed -i -E "s/((\\.tar)|(\\.gz)|(\\.tar\\.gz))//g" results/*openms_design.tsv + + echo ">>>>>>>>> Experimental Design <<<<<<<<<" + cat results/*openms_design.tsv + + echo ">>>>>>>>> Running Multiqc <<<<<<<<<" multiqc \\ -f \\ --config ./results/multiqc_config.yml \\ diff --git a/modules/local/sdrfparsing/main.nf b/modules/local/sdrfparsing/main.nf index 2ca65a2f..1cf44078 100644 --- a/modules/local/sdrfparsing/main.nf +++ b/modules/local/sdrfparsing/main.nf @@ -30,7 +30,7 @@ process SDRFPARSING { # Context, without --raw, all file name extenssions are changed to mzML. # related: https://github.com/bigbio/sdrf-pipelines/issues/145 - parse_sdrf convert-openms --raw -t2 -l -s ${sdrf} 2>&1 | tee ${sdrf.baseName}_parsing.log + parse_sdrf convert-openms ${args} --raw -t2 -l -s ${sdrf} 2>&1 | tee ${sdrf.baseName}_parsing.log mv openms.tsv ${sdrf.baseName}_config.tsv mv experimental_design.tsv ${sdrf.baseName}_openms_design.tsv diff --git a/subworkflows/local/file_preparation.nf b/subworkflows/local/file_preparation.nf index 411b0712..7ba9e252 100644 --- a/subworkflows/local/file_preparation.nf +++ b/subworkflows/local/file_preparation.nf @@ -5,7 +5,8 @@ include { THERMORAWFILEPARSER } from '../../modules/local/thermorawfileparser/main' include { TDF2MZML } from '../../modules/local/tdf2mzml/main' include { DECOMPRESS } from '../../modules/local/decompress_dotd/main' -include { DOTD2MQC } from '../../modules/local/dotd_to_mqc/main' +include { DOTD2MQC_INDIVIDUAL } from '../../modules/local/dotd_to_mqc/main' +include { DOTD2MQC_AGGREGATE } from '../../modules/local/dotd_to_mqc/main' include { MZMLINDEXING } from '../../modules/local/openms/mzmlindexing/main' include { MZMLSTATISTICS } from '../../modules/local/mzmlstatistics/main' include { OPENMSPEAKPICKER } from '../../modules/local/openms/openmspeakpicker/main' @@ -79,9 +80,13 @@ workflow FILE_PREPARATION { ch_results.map{ it -> [it[0], it[1]] }.set{ indexed_mzml_bundle } // Exctract qc data from .d files - DOTD2MQC( ch_branched_input.dotd ) - ch_mqc_data = ch_mqc_data.mix(DOTD2MQC.out.dotd_mqc_data.map{ it -> it[1] }.collect()) - ch_versions = ch_versions.mix(DOTD2MQC.out.version) + DOTD2MQC_INDIVIDUAL(ch_branched_input.dotd) + // The map extracts the tsv files from the tuple, the other elem is the yml config. + ch_mqc_data = ch_mqc_data.mix(DOTD2MQC_INDIVIDUAL.out.dotd_mqc_data.map{ it -> it[1] }.collect()) + DOTD2MQC_AGGREGATE(DOTD2MQC_INDIVIDUAL.out.general_stats.collect()) + ch_mqc_data = ch_mqc_data.mix(DOTD2MQC_AGGREGATE.out.dotd_mqc_data.collect()) + ch_versions = ch_versions.mix(DOTD2MQC_INDIVIDUAL.out.version) + ch_versions = ch_versions.mix(DOTD2MQC_AGGREGATE.out.version) // Convert .d files to mzML if (params.convert_dotd) { @@ -110,6 +115,6 @@ workflow FILE_PREPARATION { emit: results = ch_results // channel: [val(mzml_id), indexedmzml|.d.tar] statistics = ch_statistics // channel: [ *_mzml_info.tsv ] - mqc_custom_data = ch_mqc_data // channel: [ *.yaml, *.tsv ] + mqc_custom_data = ch_mqc_data // channel: [ *.tsv ] version = ch_versions // channel: [ *.version.txt ] } From 0da9965d56546bd9a235034529ae5fecdb578429 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 3 Sep 2023 00:11:29 -0700 Subject: [PATCH 037/113] Updating to upstream dev branch (#4) --- .editorconfig | 2 +- .github/CONTRIBUTING.md | 1 - .github/ISSUE_TEMPLATE/bug_report.yml | 4 +- .github/workflows/awsfulltest.yml | 13 +- .github/workflows/awstest.yml | 13 +- .github/workflows/branch.yml | 2 +- .github/workflows/ci.yml | 2 +- .github/workflows/clean-up.yml | 24 + .github/workflows/linting.yml | 2 +- .gitpod.yml | 5 + .pre-commit-config.yaml | 5 + CITATIONS.md | 8 + README.md | 82 +-- assets/methods_description_template.yml | 12 +- assets/multiqc_config.yml | 4 +- assets/nf-core-quantms_logo_light.png | Bin 10743 -> 71567 bytes assets/slackreport.json | 2 +- bin/check_samplesheet.py | 22 + conf/base.config | 4 +- conf/test_dia.config | 1 - conf/test_full_dia.config | 1 - conf/test_full_lfq.config | 1 - conf/test_full_tmt.config | 1 - conf/test_lfq.config | 1 - conf/test_localize.config | 1 - conf/test_tmt.config | 1 - docs/usage.md | 130 ++--- lib/NfcoreSchema.groovy | 528 ------------------ lib/NfcoreTemplate.groovy | 2 +- lib/WorkflowMain.groovy | 58 +- lib/WorkflowQuantms.groovy | 49 +- main.nf | 16 + modules.json | 4 +- modules/local/openms/epifany/main.nf | 2 - modules/local/openms/proteomicslfq/main.nf | 2 +- .../custom/dumpsoftwareversions/main.nf | 6 +- .../custom/dumpsoftwareversions/meta.yml | 2 + modules/nf-core/multiqc/main.nf | 6 +- modules/nf-core/multiqc/meta.yml | 3 +- nextflow.config | 104 ++-- nextflow_schema.json | 48 +- tower.yml | 5 + workflows/quantms.nf | 29 +- 43 files changed, 372 insertions(+), 836 deletions(-) create mode 100644 .github/workflows/clean-up.yml create mode 100644 .pre-commit-config.yaml delete mode 100755 lib/NfcoreSchema.groovy create mode 100644 tower.yml diff --git a/.editorconfig b/.editorconfig index b78de6e6..b6b31907 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js,cff}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 14713dbf..357f3d98 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -116,4 +116,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 4ef83298..5d2a613d 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,9 +42,9 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 22.10.1)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ * Version of nf-core/quantms _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 5148f8dd..562af8fa 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -19,23 +19,24 @@ jobs: mode: ["lfq", "tmt", "dia"] steps: - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/quantms/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/quantms/results-${{ github.sha }}/mode_${{ matrix.mode }}" } + profiles: test_${{ matrix.mode }} - profiles: test_${{ matrix.mode }},aws_tower - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log - nextflow_config: | - process.errorStrategy = 'retry' - process.maxRetries = 3 + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 42958b76..eed8b846 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,21 +12,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/quantms/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/quantms/results-test-${{ github.sha }}" } - profiles: test_tmt,aws_tower + profiles: test_tmt + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log - nextflow_config: | - process.errorStrategy = 'retry' - process.maxRetries = 3 + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 94fa05b2..b4ff5e13 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,7 +13,7 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/quantms' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/quantms ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/quantms ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e447bb4c..6ce3be2b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: matrix: # Nextflow versions NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" test_profile: ["test_lfq", "test_dia", "test_localize", "test_tmt"] exec_profile: ["docker", "conda"] diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000..694e90ec --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v7 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 858d622e..888cb4bc 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -78,7 +78,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "3.7" + python-version: "3.8" architecture: "x64" - name: Install dependencies diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc..25488dcc 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,4 +1,9 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..0c31cdb9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v2.7.1" + hooks: + - id: prettier diff --git a/CITATIONS.md b/CITATIONS.md index 339d5c35..cd9ddbf5 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -47,8 +47,13 @@ > The M, KƤll L. Integrated Identification and Quantification Error Probabilities for Shotgun Proteomics. Mol Cell Proteomics. 2019 Mar;18(3):561-570. doi: 10.1074/mcp.RA118.001018. Epub 2018 Nov 27. PMID: 30482846; PMCID: PMC6398204. - [luciphor](https://pubmed.ncbi.nlm.nih.gov/23918812/) + > Fermin D, Walmsley SJ, Gingras AC, Choi H, Nesvizhskii AI. LuciPHOr: algorithm for phosphorylation site localization with false localization rate estimation using modified target-decoy approach. Mol Cell Proteomics. 2013 Nov;12(11):3409-19. doi: 10.1074/mcp.M113.028928. Epub 2013 Aug 5. PMID: 23918812; PMCID: PMC3820951. +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + + > Ewels P, Magnusson M, Lundin S, KƤller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) @@ -65,5 +70,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/README.md b/README.md index 44badebb..457c5b20 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,16 @@ [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/quantms/results)[![Cite with Zenodo](https://img.shields.io/badge/DOI-10.5281/zenodo.7754148-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7754148) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](https://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/quantms) -[![Get help on Slack](https://img.shields.io/badge/slack-nf--core%20%23quantms-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/quantms)[![Follow on Twitter](https://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](https://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23quantms-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/quantms)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction - - **nf-core/quantms** is a bioinformatics best-practice analysis pipeline for Quantitative Mass Spectrometry (MS). Currently, the workflow supports three major MS-based analytical methods: (i) Data dependant acquisition (DDA) label-free and Isobaric quantitation (e.g. TMT, iTRAQ); (ii) Data independent acquisition (DIA) label-free quantification (for details see our in-depth documentation on [quantms](https://quantms.readthedocs.io/en/latest/)).

@@ -22,17 +20,15 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! - +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! -On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/quantms/results). This gives you a hint on which reports and file types are produced by the pipeline in a standard run. The automatic continuous integration tests evaluate different workflows, including the peptide identification, quantification for LFQ, LFQ-DIA, and TMT test datasets. +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/quantms/results). This gives you a hint on which reports and file types are produced by the pipeline in a standard run. The automatic continuous integration tests on every pull request evaluate different workflows, including peptide identification, quantification for LFQ, LFQ-DIA, and TMT test datasets. ## Pipeline summary - +**nf-core/quantms** allows uses to perform analyses of three main types of analytical mass spectrometry-based quantitative methods: DDA-LFQ, DDA-ISO, DIA-LFQ. Each of these workflows share some processes but also includes their own steps. In summary: -The quantms allows uses to perform analysis in three main type of analytical MS-based quantitative methods: DDA-LFQ, DDA-ISO, DIA-LFQ. Each of these workflows share some processes but also includes their own steps. In summary: - -DDA-LFQ: +### DDA-LFQ (data-dependent label-free quantification) 1. RAW file conversion to mzML ([`thermorawfileparser`](https://github.com/compomics/ThermoRawFileParser)) 2. Peptide identification using [`comet`](https://uwpr.github.io/Comet/) and/or [`msgf+`](https://github.com/MSGFPlus/msgfplus) @@ -44,7 +40,7 @@ DDA-LFQ: 8. QC report generation [`pmultiqc`](https://github.com/bigbio/pmultiqc) 9. Normalization, imputation, significance testing with [`MSstats`](https://github.com/VitekLab/MSstats) -DDA-ISO: +### DDA-ISO (data-dependent quantification via isobaric labelling) 1. RAW file conversion to mzML ([`thermorawfileparser`](https://github.com/compomics/ThermoRawFileParser)) 2. Peptide identification using [`comet`](https://uwpr.github.io/Comet/) and/or [`msgf+`](https://github.com/MSGFPlus/msgfplus) @@ -57,14 +53,14 @@ DDA-ISO: 9. QC report generation [`pmultiqc`](https://github.com/bigbio/pmultiqc) 10. Normalization, imputation, significance testing with [`MSstats`](https://github.com/VitekLab/MSstats) -DIA-LFQ: +### DIA-LFQ (data-independent label-free quantification) 1. RAW file conversion to mzML ([`thermorawfileparser`](https://github.com/compomics/ThermoRawFileParser)) 2. DIA-NN analysis [`dia-nn`](https://github.com/vdemichev/DiaNN/) 3. Generation of output files (msstats) 4. QC reports generation [`pmultiqc`](https://github.com/bigbio/pmultiqc) -## Functionality overview +### Functionality overview A graphical overview of suggested routes through the pipeline depending on context can be seen below. @@ -72,41 +68,47 @@ A graphical overview of suggested routes through the pipeline depending on conte nf-core/quantms metro map

-## Quick Start +## Usage -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`) +> **Note** +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how +> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) +> with `-profile test` before running the workflow on actual data. -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_ +First, find or create a sample-to-data relationship file ([SDRF](https://github.com/bigbio/proteomics-sample-metadata)). +Have a look at public datasets that were already annotated [here](https://github.com/bigbio/proteomics-sample-metadata/tree/master/annotated-projects). +Those SDRFs should be ready for one-command re-analysis and you can just use the URL to the file on GitHub, +e.g., `https://raw.githubusercontent.com/bigbio/proteomics-sample-metadata/master/annotated-projects/PXD000396/PXD000396.sdrf.tsv`. +If you create your own, please adhere to the specifications and point the pipeline to your local folder or a remote location where you uploaded it to. -3. Download the pipeline and test it on a minimal dataset with a single command: +The second requirement is a protein sequence database. We suggest downloading a database for the organism(s)/proteins of interest from [Uniprot](https://www.uniprot.org/proteomes?query=*). - ```bash - nextflow run nf-core/quantms -profile test,YOURPROFILE --input project.sdrf.tsv --database protein.fasta --outdir - ``` +Now, you can run the pipeline using: - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. +```bash +nextflow run nf-core/quantms \ + -profile \ + --input project.sdrf.tsv \ + --database database.fasta \ + --outdir +``` - > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > - If you are using `singularity` and are persistently observing issues downloading Singularity images directly due to timeout or network issues, then you can use the `--singularity_pull_docker_container` parameter to pull and convert the Docker image instead. Alternatively, you can use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. - > - > * The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > * Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > * If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > * If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. +> **Warning:** +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those +> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -4. Start running your own analysis! +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/quantms/usage) and the [parameter documentation](https://nf-co.re/quantms/parameters). - +## Additional documentation and tutorial - ```bash - nextflow run nf-core/quantms --input project.sdrf.tsv --database database.fasta --outdir -profile - ``` +The **nf-core/quantms** pipeline comes with a stand-alone [full documentation](https://quantms.readthedocs.io/en/latest/) including examples, benchmarks, and detailed explanation about the data analysis of proteomics data using quantms. -## Documentation +## Pipeline output -The nf-core/quantms pipeline comes with a stand-alone [full documentation](https://quantms.readthedocs.io/en/latest/) including examples, benchmarks, and detailed explanation about the data analysis of proteomics data using quantms. In addition, quickstart documentation of the pipeline can be found in: [usage](https://nf-co.re/quantms/usage), [parameters](https://nf-co.re/quantms/parameters) and [output](https://nf-co.re/quantms/output). +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/quantms/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/quantms/output). ## Credits @@ -117,13 +119,11 @@ We thank the following people for their extensive assistance in the development - Timo Sachsenberg ([@timosachsenberg](https://github.com/timosachsenberg)) - Wang Hong ([@WangHong007](https://github.com/WangHong007)) - - ## Contributions and Support If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). -For further information or help, don't hesitate to get in touch on the [Slack `#quantms` channel](https://nfcore.slack.com/channels/quantms) (you can join with [this invite](https://nf-co.re/join/slack)). In addition, users can get in touch using our [discussion forum](https://github.com/bigbio/quantms/discussions) +For further information or help, don't hesitate to get in touch on the [Slack `#quantms` channel](https://nfcore.slack.com/channels/quantms) (you can join with [this invite](https://nf-co.re/join/slack)). ## Citations diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 301c9cbd..54ba5052 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,17 +3,21 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/quantms Methods Description" section_href: "https://github.com/nf-core/quantms" plot_type: "html" -## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/quantms v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/quantms v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (GrĆ¼ning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

    -
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • -
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • GrĆ¼ning, B., Dale, R., Sjƶdin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Kƶster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475ā€“476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., GrĆ¼ning, B. A., Alves Aflitos, S., Rƶst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580ā€“2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography}
Notes:
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index d88cd2e4..77d56243 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/quantms + This report has been generated by the nf-core/quantms analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: pmultiqc: order: 1 diff --git a/assets/nf-core-quantms_logo_light.png b/assets/nf-core-quantms_logo_light.png index 71f6dfa4f0652a4c52d52f5d89183bbc4156a7b0..030a38e8492ec6f8efc68aece1d500993e22f6ba 100644 GIT binary patch literal 71567 zcmeEt`9IX_`~RRQS?ZKSWhn*~p=96c5GGq9OZHNfecuPCQz(&w!1jG0qM))u18{N;szxKLnntC7*Z0~7*=;B1!jv^4p5Gb_^hQ29NgTYTSd@O|5 zS3HI44fR<@BwC_WweNAg^K`t?ay|Ua^`zuS;o*5X;p5j0nLR_3TdTw-*C$<<{Vk$; z9`%au>-b1%=CCl=x~!Jp!Br{RFpzjKp!3X+Tb;*QRKss@Kb){h^c+@seV?p-3zMBT zv9)Zlu({<`v3Pc z_~QTk@G~L)&kz6ShyTBGp!b^mFYH1%8g&}PE+NMRdy{Rgwkaa9QvrRQY2HJz)6`6H z9;J$!8p?T$p0J;N*Ye!J#ykH8M)iUCxVX5E!@pK|Rzc1t45Gxe-2E^GvsRWhY(8G+ zqQw!LH!;zIl^)J$8$X^IcCItbD!;xEnF(K*M&+X@JSfW~(%%?AjAD}I{FvT)!b;+< zT`3RVvHyDV#tr{F?pFSzX|tN{P8k1QHN6RI-9sVD@-lUEm%l0Eg`Uqb{CpIznVgoC zqUmmd=@Irb{U+;BnnF@S4JpEd=f8=bxA|}L4A?vsm9JMY?xEj%PSrz{(B9T6zCrD{ z5aNCa{cB^cli-wq*o{Dpv7Lu_ua|VKlQa68K&C3~Q72#9XybNMzba}b4=Acza~8q2n+%iDoFDn0jDk39X?^7A)!^mJ;E z5ekGVYdquWg)k>J@LX5^<&$Ub>jptvS20#izP!}h(}bdq;~{4o<`Z~-?Z6?eBvmOx zsE#!^me;!Al9p_BB9-oh+Bc@3zYqDCn3hx{MhJ+VI+>dJOaT*E;koA-_dUK}Uzf&# zH;{fF7_10)<{MQM8t=)+Bc#9Hzz?%a`@_R0){SISt$Kn@K8L}>h6mZ|Sq!BZKB@H20kftU}^PiE` z)c*Xdd@3S@t0+sw_uO~aLtzgUG2d;xQ1Q*1H#0qHdV%)wP1#8svyWz%C}A74L_x?B3pf9H&Y@2X=|G$}7iYO?E5Lr+QZ zunjfr@njOx!!AI9VRd9th^kl#?3g$t5Dxfn?H4g>K($Nt+fHaOY#hv@QlJIXl)td!4Cw33#odkl6Y zV>S|OhL=y33;S(CMLA9S@}2)++OhBFrXf0zRg_T_+T~HTPwd7xJV6cPBJX{fB~&hK zs$Fc?B(tfBkrDJu$X3Q1{1zTNRk(@T;z!+JtsYJ#VQFEI95Bp+1d)p+`Gk3TG-5Wg zkhB!>_0%li8!7wS)(5l@KDF!}dm%NoRf{a39g|I_D;7#><0*1`M%3kp01AB_Dq!Zg z8ht}kcgMfVhs)|`f(tl+ixNr3KYnoDKRVH}!H24qCWtT&%xd}zW+opB3MoDNJ0-8f zNvx7d#yy3T+j3B!o%L;!;b>EGDQXB~+h}0EX^k<%)ZBpGVwTz%Bc=Z{6LNVVmQ)Zs z#qHX&f?Rw4S8Pz4H6Vlw2CL`ph1rxV>T3%^&1h1dBkPo8>RjJw|7HE<#P4E!4_OE` zO$@0HI!7pPZx!b@3)8f7f(6Vl`(n8hAxh@*>=H@8QQ)g9oK9SqBFr%3t$}fQ3U0|& zMTUI5{BLzyt1e{`H?CqHGJTzP#T38;zV<;^=nNbG6N-_k!KrUQDx)Z|AC(bG|5a8Z zB*H@M#uON%NKm+sWqkHO`)aB@we3grs9;DMV?Q{%PqLj~`hASTUIF*q`ZO5WR)wVFI`G?Zxevi{$Td5LndKR;aC(U=|9wR~L8w;+zr-%IHsbY> zUgGTk{6DWrVb zYX7qj`>+ae$t5+}$|T_!B3=Erhn`P}k1ai*^PzUqmU{4eDXuat%oMLHRxej$e~5m@ z@ADVp?D3O)y6!#xyXd$s{yrf~zYM$Yrd~^{xM%^*VgG&MleV6Y&|SUNwG!INi~rl; z<-XXdqpn!99)UghSN}nCVm|NOx&~&TmiGceJ?{6R>laTmSZ>pxJbelcMsk4R0F=Ar(?q*%!}BhZw%+9K`8y{Yh!MT%%c;Bib&k(wxLRjmW=N{ro zoje;XgQ^~##P@&C)S#ViS*=Lu%Jg6vf7wA7B1zehn!53h9Ut=hiFVdZ2A1)BWO+Or zT}sR*gJqqhOx-8b1SCR0`&Ue?BhO8gDxoY*R=fY z+Cyn|_k)xr7Y`wB{C-T)JdQ-^IL_#4Kt|xti;{O2Uif`>)vlM+z~WAes&vp2#~e;> zaP#^zhn)Ghwj{nES?XIu)mFnEPiGi7&MHYgMRFdBqLYyRcM0|3NrSwRzt{zDC$Q16 z*lJ*$9KIG@s!K*lv(_p8gm-n5bjuuJKPNIbLluNw9-=Anc+g>>{ftA1)Liqyomg7G z0lZGlRAqUVOzOE5hF~nSdqkDH#ahTn%b<|fSG~?U$lf?xD}R^!j=>M6H8HyWF6y2} zPGPZ%iKNdTp7uW4JWgAQE8vm;X_WJc)Enn#$({*pabQ-s4krlc*`UTUP?m@IrR(4uk6XT&bDN%A5aA~}3fQZ}+Rd6c3 z*IAG-N{$P(j4Q>Srfr2tpV8=0h{!#~3-AoOv!u9tWom_0YBxR+7|^?x3!H1(U)HeMcJvM;GiZDK%TC8~?<`}ApK9*l&Oz?(AV;afU?!7R7^1E3 zn(zjAZ>L6+)k_BZ;z(Js8zvb4U#rVK@}KTN_B?4j^DOxi6XO26e;wx5>Meq@OeH16 zPKhP&D9lsS_dDnqJvA_TPayL?T-&Eo4MaN$Vsh~LOFAw$sP98vj^)e3erB(Ix)0Ed zcRcmT-^mAK97kIoOzJos^3BBIn=oowuyWRsVNp-Q8QI%4?47^vYmBj55kB(7-5G-Jw=*jed)*MV}zlKa?!7quxNI9Dqv5~0*qxF{ z-|ays&_rj1kTx$F^uK@^zBGGr$N8@D5U_4!fjHEh%d}?#HzMqS1VBYf&^KYut?s3z z#x(Dl-G0}fkFA#VYCT#)Cajcq(Xx9}P9Gs}$ynv!cB`zU=s>7GEmrr*<+Gsc;!_6q z1=Fl1&esa#1l?YLx5t#zFs9X%$7g7LW1T&4gw?plYc~G0M)WlGL4fi~%|d=l{ONR0 z(ExtJ#m(uPIko8AUgyCi5<6xC?H?P${GQ>p{S!2bzAysv+#gde=;uWi-SN!d&Z0cl z=Vxa<6L=w~xspnfYZmT}S`g$EU~=c)X2)i+nZgjfLi{{7BR9A9V@M?IiAzae66wR{ zbVBUFuw%J$iY49n2)JM4(tQT$^3x(BBAJp1iSJ3%-4{`4VM1nRNn{A0Wy;eaWAc95 zmX5rTQxA~AmcS{swE)2-o_n~AHzPLsJI(%{&@RtXp}uWD?G!-#W|yZ}HlXQ(*l93tqTy}~zd~*$CAgPi|Hx9G?WY5}M z02i&|#Gzt|tMhtL2iunNy9`lKjcFtdl5U(c0=}qQSucG4Onn{mfpPuC~ zUODq^;@FC~c)^rubE~#vvhN#etKRV16JtlmZIYdM@X)Bpn0CtGAJ@B}v82Whya624 zAWNK=gJR5mxMhoFA9d`R9<}|+y@96bmehO5?J{6J#mA%^uw=C3g0&=Yhgqk{lD6Pl zA2MNCrS_F=zGQJRW^*O@TbhT;+S9Ov8I?CaYg*B%^XJm?+K0UD#yYZ6KNnk=2?@=p zc=mdfEVeY#XB$fMFMFYgxxJ-=GENxkH(mxUP$i=}qjnpYz~jsE$`XWx{Ko z{su~~zYEKQH!jQXa{LphLJz|!xE7Bz&XW0HhkW@%MrHfMT?G}tx!TNXzI;CFJ5KS| z+d?rqica4@b;u}fj(?1w;vxQs=2i$^nPv}O^2q1a?fY1*LTE(|m4YKGJh`lI0QgB5 zLd7Q`gSl>EmtO3M%k!8F{Q_tbt)Q?GgUEKEQ{K}&yDmX?P&-6cwO7Pf5_I02N$U;D z^>}L)h~66K!L}xBeQR1XE4$^_To%#xacxYw<_$IFVFHr~HRaRStq6wUxxh^9K{nwv zGSbBg62eHHrLdO9f=R$peChd;#blkTAnf=uz@z{+E z09mH;dkVd2@B;WHFHWdCk-9TsY`B4HF0mG@Y0w_n%lfxep=Py_`>pF8HAic zI5>Dzt5K|fzC3L9WK7<5F*_$RAK>TKRTAWIyYol#>f`FxkO*AF7vCO4Eh?p$q_x59cLmsMlbT+}V zaI|PtAk*V&lNx5bTV?I&R}u~D-glvDnrJQ!d9;*d={1AV_H|(ab9o^1DGx zEg*8wH=cWZ&jMWl(Bb3=VVJ2CsbSv&R{t)jDfS@mUP+~{)vZwNT@_+ChG}txxpgN5 zoEUkoKQHx6+acPT(tX;P1!#WopOG#Ay=mGdgRh0xa7Yzn`F)du8^WH4JELXyeXy9XZNETOysflQOlCGBF*;iJnGrL6%1H`;Ol5>#tPMvU^qdFg6f+ zJ15{3Uw%mDwl9BEHY@WzC}z+7&<^JkfyR=ThRTwkPyL*}H=xoj`;$p= zzvcr(!zV$+TpgsJOE5~&Iu_a!B5G-Szdsm3JB-9Fv?8G!dg;0Im|<{;?oNIT>Mw_u zc)4N9LGY&l#N!Pr@+CYtT`7<%?rS-11^B9A3X|D zz`k>awRwQ!@Zpjy&@Rq`BKE}8fF_hR1+je_VFF#Pw4WYkP`_+9>`NqEb*gHg1zKK# z9$UEbB;f-%d{2K8i4zlOMLs6c2Alex9lj=y7xD?ln8j|GV)T%Ht{_O8$oT_~^dpxb zh6WP}2HLBBFTy$k4vuWXZp^LOJN}+>so%B{$y?m^&t!i3t`;ZptDkukl%4!I;I-4amD{4_C|db zZO)L6QpS)3z?ueRT_Op~KDooYukNekjPxi;Afr7!vZ@W`8FH7KQEehTFy}6Xhdg}Bj%BxLhz^5<=~ zrJ&XZ1!n?b)vw=MrncjT`pUz!c7_Mm_2vn-!H_(%@uWNm`l$j4BYD3>1G>f&!KDEh zuXthGF+96Nj(Oc46AUNoKh0wc3yq*^&k*k3OQ%^>h~DYB_{L#K11?8(IF=tl4VlX` zMOG$&kXWFZlMd!&o2S^Ck@w$&+a4-RQxde8 zhGZVKLiQTS?|R%5$A%c8!MMTUp3#~rR4ufb%a_T=gv~&9CX$k42Q1}xh5@QxJ5-Se zO<11i9!(6?i7+79&@ktMc#3qHQhSn3jY# zn()HALZ!onAgu|0NiBT3VTe(OOFYa_MqYyO+Igr4F>MH!VT0Sdb_l2_5AA)BkRplz zY67NS#Pi%uH)8<~6fiX}J=utEmR9nJ$b(Slx}(J%bj-eu-&-8ZJ$G2ML6xQA zAn$*S1b*Nrux5H7vK9w{fGcQ-XFC?hb{WqE`jYR|FDtK<7QdrH5269ZQVSZR5JsC% zYD*y4oDl33NA7(pbp}7Lf=ANz3oMdIKMMhB_~RphsVuLXpoz@ncSX`BrMlA2&3=Le zr=R#GVf5O_Xw@XE`ka;gE+ojMDkPy4EYh2}2^PujSTtg^Dwjxl`x8^S*#Bo-a)~MA z>X3;%V(y9P{#itTa%OHjdaY7hm6%u0FA6rueZa!(z z55fR4_!W(|Y)7QOjkW(ASX(RZ05^mIM!wMa#KRYB6NL2nLt0$|L~%@$H13UkWcF=r z`R6Sb*U{lvTj&`WWK&2m$Hbo+Hj_uVHq@qrle~7EG{CIF^po4H9ib5MAw#`nF)#2a zskzw?mkZ`ZT3m&w({4j*Y3f&}v`ym3{rX>ST8FkF4wX+EYy#6Da?BGl^l2ksF*uF_ zSf~FIiseqVB)Xk7I-U)Z3xPLz)#r(2_XdOp+Q|V>M&R-JqC5!o-U^;CyNQJ96Fkol z0ui+IH8F;9L=Cclw!91!P9v0{6Ux$3o=Kw61;|qUDTx1^F2F78u$?LlqwQc#!YOyj z3wao0qG>yrwC#IMe%(Q5{p2e7gCJtkB>*DP;%-TMG&e^bSEfYxsr6E4u8>&@`vA)k zxdcFVEn&Lu2qsQM&ZGW+Xv1=NzHkVxy8(U~=QJ_fFaS@1l%flfx{Z7aNx5?ikptdu z{Iz(pIxZe5Lz~Z)10m7UbOc0FEs_(8Gq;xm5{Y)7VO{DbvU5p+_xE>uE!9gj!Iaau z%TFIXWBQcl8QS$m&d-|+{G1^WoC~bS1nb3WC$J$>;x_+XN(!O`AFjVa!rEXG5`K;b zLkucjdLoFq=2sw)uk#>uh1rhcpfy5-0i{s0rF|25=m!O-h2=Vit8$brH`j`EeQw`? zL6`I+b)0m}!FGYHzOt7qDQX zIS6n~695KoovaVSl!6c;GgU4mm$Y?s0f=D8&_)T~62QOo>)(U|a=<8| zmh<}3Vo5buv9oOvSK7;t4{f@qTbfzW%O{eaBbhLPRl$D5)gGw(des^iu6^*W01VD= zV`SCyCXV!F^g(CP^s5eD;YpQ(DVV+nE2t1WsC?LjMo#~>30v%zN7F=bEEDaTetXht zD1o#E_J1y^GsUSdbxb#c*pR9T1iLgE)cIhl2K;)5od|btFs`W=y+@_Ni2Go$G z@Q{h=CgX5+t#?(wO8mjy&(d?s1W;^(en=qu=JwRZH31Ya4A+#T-}62FOj(4Ize6K}@W6YZr^?Dem#2jOqCXeRmww! zGoXHbb(q>X%pi-d^xzQ?UExb;e0Y9E7+$IvUKF2wG*%JQ^{QuCsPZgsEN-9sivbU` z^o-vqspl3owq}(i0*$Rkr}*|_c^%3<0OR+;sp0(+>IjV)o+Gz$AOr8Yi18q}9&GBb zhCVk~4W$D)%R_z?rKpk>Y~a!^-}tp}xLZErW@WFlQsU52v7F)kHR6QLkLPa`e7PWu zP*($;n`-Gse6jdZF{fFHdOy&oao;`%FPORU1nYRZVCpQF<}Y*}i+P1BV@o7}St8x_r>2-9wNP;M8 zcD9UX^E6p$%+jaBD+&%Za`9O#c7)A0(g;|qKb}NcWL6&jTBlfN|LX0O_N>=8LS}~s zEG>-LxD6U{;Q6zLS7gq*oU)Xj)4UHIuOt8#v3%G9OgVIN1CN5DR`a*hn4WcMhgXDB zET3mhL~RFhA}g0OW>3rX=Z(1R8A>B*u+jHze?P<-rw@NK&kIl&y4o0 z%LA25?zFbbb0q!k(@9RF=!8@GnzM3FN?D7!<#~RA`YxsQ0HN@LgA74Kd!kPf;JS7( z{bOMTc9-*QcbLo2OA#@Kh`ezN@SyqA0S*o(*?$tUfu^W(7FFBZ2>=wKiV0x*H62-`5Fclu*L zA~Ipi-Mq2=6WV6m{YiUEZ;SypCJhiu0!L}LK>g?tkyI=$n*VCQQ_2pQKnKvZ`dcf( zW!^7Wh9_W1bPC5%$)`mLLn%YIqI6mGFsa$VK&*8n>!rELxi1ZUF(i)7X}Hj`zyj*c{HII61u=Y<{rl8{jrhqkAEU5q=%DQdXOIh0xDvYHV8Foh+13dBI$3Yd4~3b%RKPN&QF6obt$IcIBy*HauFFq|vp$<%f`KJ5a8XFyi<8}qXRuV}*ahZQ{g zB#I4Eenr^N1*2yg6?F<4vjkE^Y?n-RvKCWFXJJauev8uSfw0=yUMsh4+Z)tnp0TtN zhyM5PYvE0}LBHz<(y1Rt%#K}6GXFh~JA5SnU z(4kC|If7CaB`fZtoKX}kjSw>H4J{xGWQ8v&vsvc129b3({jj$U9dAK)8^_krX6J!# zIxW_rTP7Mp)wT=zd62oUF0=NxDXnf+`wUUv71&SpDi__ySdKB&|8%(&Ba<$!0N(do?Y0_U~$B}&=QlWP~%Hr~FH$qctY?fm)58_koMPp*h( zJn3j+J$KN@k#?RE6iF6U1l#d{Cx%pb1cTHP~un?rQDjRQ5zSi@)HkbH|YsJFE} z%IdEucy<51w_zb#xgMV1E)d6-W~&UlNK=dTyp9)j12D5bqpWdPHZl%RmduPR=4A;e0bB0cAG9A(?*V0)a!t%S*Pumi8vLLfTp)urZ-phYc`kn znQgB;!M50G<(_T&5zyFZTCoXVP2ukAo;;Y=wPf?8DSysHM5M?H_ zM?Wme+|<<6)Qt}@hB3?{hFEjUbOat=K2*|1U#4c`%Hy{-#+zE$7d#W!Jx0&BJ4!lA zfa!-QG4}*ZK9e$>O|?5TBlv}c?B5%;0m^F+?`B+!rxzE*;;)*`YcRhV4_Pc=nV4M|q$8`7S9o({=o;ipR}!KWvPa>3ogeEH1k6m9Ibd z*&c6fMz6k4v9uNlNMFG7E4_Rd&GH2dKT9!=t9!6PxVA|wDCi6ghLEN0zV&88OHD1q zXW-+DVY*u(O|nr_*!s|ws&Z<�ev`Q}H7y#R1zKkC5n?0_OP7^FqWWeXhX0t0pNK z(bt$TL*ehNPtM(;VA@5R9zN!e8~K<~cX3NnUF1p*`5e(DU1F8lRX-)8KbL`E|L`3V zNx2$Zf1S7Do%}yd%DH81m#>ET4sG1bNkca-B!p$@$27Ju`3?2uL@BKov2V<7mu!_y zZ{zyp_2QITSG-eP=P-{N#gu#(3@bdT4+KZJNda3|h8Nf=HS=!63yn&_8xd=3Jkhf$ z!}BGTsS9Rf-o-Z?Q?|cG3CC|q^rGJn>M0i8LCYqr+E3?cMnhr-$;c_-;y3nImk_jg z*SB>)9>F^Z*<}?lDtFvDC)3w(;J|^ymifdvBjSktDB*-0?<&&u_8~@@7`@G>U0<++ z9+SbA7tkuQpQRryewLjRBRYX|j#Qk}?Z|6*YO7K~og$D#s)y)BWmu8L?D||OjOHli z(rd40>4_~TSlT+@@R3Vwl4m533X}aO_w!RFZu2~QpnL7?*4I%LpD*2+wLVo|@%I8{ zzZ*2>_N_CqtE}T$qqCAa_KGgmtQr5qR1iS0X_i)@emeG`q0wmFbyr~nZu(wbqnm8n zm>_weO@nuHR=8~I#88`0`PS5U9d(wcUZTt7AX?2|`@=qRC83w>Mlt@JqGP!z*B~9k zLWkYhn<%5xrfan)FuTkCh{hk_05N^8n#jP+e{_`}<+~B3W?CiNuAua}a_MTdYyUEu zusJz*oM-`=N*{Piw?l43yLb=$GNYte%b+5I@-V7dC>B1^m zR*$`EP?Yr|V3rCL9eeM`ru`w7D!cmZMv3U8-`dIMVpnov@J7;{b@x9^3m-Z3Y{Z&* zD_zX0=I>)SdOkw+&z36W$kA!;9RD64IRcJ9N)qO^ytsAe+9S#M%>(p0L@&TU7Z<6d zXj3LQe0J3d7TseiYm0wOit-x`{PWm{J|RZs<&$+&Hgo2h z5yoyB+HQt44OJ{z%<^Nov&O3L_s`N7xT*-x6tM{ij1IE&RK^F;>C|9s3ZaVQ%s1ZD z&nS+C*X#c67*TD{>-$e&9F_U?(pP^n73=qY;t~6n@8+=ca8aLp%dr}3!iDJCk?<^K z&vypzO3_=}Gj~EnkD5>38d&H~S$*Q#8lks$jjwQi7#*)n;Y=>q4V;``tYFUD_J8e# zh|!nSX8$YmI;3~P|A88khWk?zH-)?If|Hk_xY3dxFKoZ2t zJhyn*p%TVmg-uCC^US3grB{BCe;gjJc~y-@ArHqhvcIIv>?>x{3Ka?IQMYkLr(_(> zW9Yhih|wXG9m5&4$o+&R?gWb^T_Edb8q`Plm^+Gd%I_1>MvGg_x>l(|hG zXL8v{RZZI(QAKaWHr5s{+1W7^G~V*hY!i97m?+bvfBkF?1U{OvO;CKD`v$kh#Mp6S zW}dnS&g=07uy2cfao?kBg`l52EM{x5^{qZ9WVy(?lQ9ObhGymV&M6W5@vZoDNTGn5;{NXx zX<|J~8H=}B&gYFdI$k|n(j)EUEB-F--tzpx?lX!kjav~2haKue-^}@3(<2`l9v*%V zpct`r=&rGCgdyq>V-|xIQ&eFazpBmQxvNAkeJ+~rNaF6(0Q}arT=aY7^=HiHH|9($ z2FqKi7a4zW5&2$7`1++}teA$yJok{Vzq)`Pmy%Nml3Kg-F zXgU?f+Q^T}S6DR=!9a6CFTM63I1qE;!8>bUFzl|a`*)PGkDYY|aNoPCe2S{MV#&TC z!F=~d-rdNg6D;BHXbe@$z9Ddm+VuDVjk-}hr>I}r58#I@|Hf&`?C6on@5rDQ;BtN* zCm#GK9DZNG)n!xr>vw+e68-Re^a17vyB)GrmOgb32YfBAX7Z}B^qsjdl3ZJRYm~<- zu>14DocgGES;E)15;iXQOAcTgE-RVS%WN{_ViKsrj|B?;TuuS3;|dS!u*jwlru ztBk1E6!us{JY>%V92A6y^0s)NzF5~my5ZE6)b0sJz-@?W8pFoHx$16HHPOny-p6#g{Jl;f&|&AJU;;%xQ`;X{=fW1tN4U72f4 zG2cMw-+5+3LoqX^{p5EUUI>9<26SbY{c>rF%o(YY8`tmLVq6s@K1cKBOl@2}*jRT~ zwnF^kOUr9N0z8a!ueni;qm=x6K}x5od!>a{9A3?Y6I!_mV$%j)A(Y*B&e?@v8S-a( zSs!W+gCwB|RuzEbEPOpaAT+ZfMs4{P_i7&;wmSDNBc#h04lydP z5hC|$bEW#=|eu-u>CWszC&qFp66I!fh(Y*Z8a;X4HJEb(E8rIV;uNI`YuH-0LG z_x|L@M;I=omg$aE(ovAcYk2X;oS)P(zTYR)WiNgO zyKe)d4l{1;mgU^sK2|@v0DmngV>`~z-{GLowF<(4%{)|B5!HIprtr|JB(XfNq)F41 zdBg7zqyK>m2|zW_rj-*ODz_K43Ai6K?;X2D^odN@Trxj!?`>nAs;1XPoBi~&g)}9R z%Mk9FZFTg7bZi1w?Ot=Hz}>6#t^$S6^%~71Rd%7%yXx;S_t zt$ev7PH)oT_RV1JM{E6CffG#%%Bw8`QG6>kQr&(jVIfv&iAif$%O5ydUwiap6W<&v z6Fcmpmhs~C*}t_NH&TIG85T<+5v{-jE2d1K8R0F3_wzj=JtlSsiU1_P;jIu^rVt_$ z12*~{@dWX^EGlooFiB*1lh^f3mtR~?6WXJ5B!8FTMy%2r1aV71x1-&JDdv*D$fk(E zVm%|}?A;~_a#xV!!8snvf{hP7d)bjzB}+edZ+|(zqRkJa54CYhAB$vW9i)=5Jb1Td zsKHz4h5CdIc?r6d&$A<`fhL|44`p0}NYs9xL{5hW#nr+3gyFT9ae7LB7N1huo;yjb z&wqUL-Jo$kkm45a9E#{1v?(hCYS$&-Bp%v6bD5a*gN`dT>3kVm>-w&YhaNy*!&?ij985sS&kCNa*JE8-5_j zl*)Ynf_EvK>~Nl0&OdOB-Lk>%-s?G}==9cy*Z4c0bLjG)or+@Iy6*0Mt>7%jftcqU z_udxaRbCWFgPc{vTfq-3ZDye=9>R0)Bi@CaU_mpj1{f~K9QZafW~F|U&y<^Q)&CHq zFo4D-zr(JPUg2U$d;*Q;!ZuHD4D6}d<7)|w^W(gcEkIi(h^Cp!=CPKa!I7uay&pJ8vY}rHdBkJ~S=vi+eT$}~wv;e%L7}&a*03xDe z641-lqNOI{=)U4uT~qf@4QM{Q=j=M%-eZ{#(dJS=iu^w{4uPI2(A91YbOkq5dnMu^ z15m)6Dz4IgZaQj_0FM0W-{F6{QB$+Ehc;Vmu4mC%2G{h-{o+HBkP?7|AROl^&*XlN zc{98Ncz*GL$dj#;uK8Yn9=-%52mw7idF*<#&aI$(UQuEe&OGOBRZcJaVH|)#IH90w zbu(d01*q~5_r>ReULX$yb~x$fg?8DnBhL)Ur!y5BcXn#3)B#SIPF@jTO#X+%}kW$rp4 z3HUieI@rAoBzq4wsev^5inv}1Sydf6MvtALXt@YrrxxtnRhJqC@h{PQq)%?!|2&PT zpP5>5)3pHS*KMqIO&W(WVY_EfVp{Cxd02)`XoJK9h!XVb@0(q4F2# zJ}mNy&+|Bnmlqv1P4hM{I*^EWBi?`d-6?cN$lB^``8zBA%$r;9tA!NF3I$fVIxVhD(!OdjKfxSyz0@J8@s*BK_WI$@|uGw$m!mVLT+5xsx z{KGk7{QTE}Jx58gK}JV44rH?!|6Sc8AJ)Wgapd0HBQ)FW>n>WJ;vmc9Ex!(h$pqqc z8QU$FAE6>prrggQ0J;1iHDkRVI|CX7z+Xi`kvVmn`a8x4e!nt|yE*#)L1tRH72FwP zy}zc8@yNOTAu%*!f}4v0+e|0--z5ooD6v-%V({(K1kI(3Hm*lpE4|pVS;4rleR&L?aN7Kv{&uC*`91Y|dCsl=N?)>V1R&soy^VyDmb4<38D)!4InyyH&6 z0f16w;%OKKXPivp?+|A&o!mWFCBUZO|8%zX^pC0=yn*wtvWC$=-ao&Z+91td6AYAd z!l-jeHRp2*41eHtPKGkGu>*&tXe0PnR3d5W%~sw)$Ql@8vJhADJi-kl%mUo*d9lT8 zdO|NQ3VcSJDtZcmSOat* zd%gvZvK$-FccrVC9p44n&2AF*>TduE);a!3ZvJ$2;kOrUzvKx9m&SqQ!UN^W&SlX+ z_Hcl^&Kr0c z2vJj0bsAlsEv3mQa4tNe+GnM*KG3D{Q6u-#U4aBKIj{YuYvU4kcx;N)(KzJ_={MjAFuLS?R3PHnijg*CMuZ5>*2TkknWmFH2nAKDBSVjNthgj z441SWzajgc%#wb9c|*XjDC@+^q1o~Vlsx-%@yuDGtMxmaxH4MIRjAOva6YW< zFzABA!sNW}3mFRe+N-*g+!j?W@*&}0ItKAZ)+U!^?=F6e$Ue;R>Y}Z+=M``$sRg*X z9$@rO*o*(H{6N!|M=q5ABL$mP{Yh>C$9-$4KFZ$y)1!4et}IvZ0*zuhK_@)7;<(0tx5Cm_Jqrzhea(H>C6xM|;cjg@1w zuhx7IF^WgVevuFJ96L?gU2apvTk)CZr*?qQ0T>mo@y@AFigJ|DC6+=ZF1>);wJ#Cu zDa?V5@}Slt@1I~fKZ#UZR_hF6Yx$E1Q;krj-qL{*Dcz1rXXlpGW8$14M)cyxf&+86 zb*Tj>$~LRK_QxFY6Hb~b5oSkV5zY@{Jq_yE{tzZJQm%6JAS#yb&kA8{GXB0jbBM@+ zZ-sfD+rX?hr|H;u2ge6bu>%Jfg6}b_?6b%wEAyYV2h7wQtU*A5!NroL-j;1`xMFXl zSIF@ao{GJz(ymN%m&LQ_-=mTq*Y&xolD`)q0IyOuhKmz0DmK-x?U?ez%3%;&B#Y{S zcKR?(;6!&T+oz`g-5p!NRnzvJ6bzS72tE*=SBRT1B(eV_cWQj_)tsbu+pee*w$Jyt zRxwb!*;1R4{axORv&G?Db8yEHS>c3Nrx=?IqPE^|29fmMJMR9n$Ws#wzY1@%hl{Me zuGwB}y&sGyjixIdegma38z|1h&!9G$bc@^0?E2B9rCdj+sHEFr^(c06LKYQpZMio= z76r-X?~#%*%On(P#i*>Itgrc}#_nA)Z+(Sb|M3cE_KU1Bq~yw?3QE%!Ve8I z9KS)gws75Rc>?g|TG-=@N6W~{#?UmcP!q$slAzUy+*sozSkNX+A83(}7TO4(!uk=9 z6Va5j?R6NedEbwrGJ0r_1||=l28w=M_x-k9VG9n6&^?A#^Z4V4!Jvb%UYl;`opV4| z;Z1V^!i5d;YOIR%0~g^wrmm@n+sVsiG`f6x8kvy1M}m&KHhD$QV>bF&@P?OfaBbW* zxC}sWl=Du-BRX~mTduC%3r-Ub)*q5Be2=qg>HmW=_D4LO-pQbvta6x_UG5C>KBJ-hc}&vz zZ?nwzsH)wou7?;C7=js7Y?7NI*=tx=u?=#zFkCg+SJMYG01Dn zo%MX{qLuA=X@pPb$z?@^;@3Ope7MJ1t2@9nbhOCgCt?bRQ_wPD-e}3QosK=x7I`@6u*Y&)f*YmpW*O8rQDj_T- z@}h93a%r@n4-iJLCjaHc3#jMD1SXhc+xbu3*;h{e`x*=6qom#zvWJ(#VRL)Mwh5FD zA0d`5DcpW``T@6y6l!V5ZR^l;J}ey_*!gm4(E^kZCR_v6K-n{-9Et|1+Lt*&ziqBQ$XXl>)uE;ekq^JE{zl2xhx>V^#t*KS+K zP0(&@ExRQ?$zXr$n%Dj#=U@Uz?nRyL=HXx`y4PR$SGem;yYr-~-?)EOog~+FoJ9S! z^}+KTC^n_Om%rQps2kVDz7Uj}>*sq300^hGGECx5S4OgZFRLSaA!}pE*q3yI3#(9Rwg zftY|o_2f243lz7s_IJkF&Y(}!ocZ|lN`{4U@K+-xfF@Axau+YY$CebSMlT85x3iTz6X+C|GlUiRiaRrN50`ZGJoy6g(1VHJP#d@Y%C0_2v zeYdcGU4|6zDE%cm!D{w4ai~PwHdO55>o4ybp>NxXRH^@{QnUNOWCB8!qO7Z$VqlOW zNasf1dlf(7u?<}0-|N+PPrsxK%R}dMt#wXIJ?7yJFwIe&*6ct5cq>Lx?JcV_@!1{5 zxQbJ)?BL5ZN@}2fTBX#POz(p`#V@-&1#e4weCz*<|E{ISg{KUPtp!_k}9@K1@mB7?>dG`_Z5$0R*ozIiaia!mt8GUhq z$~EQA9U*yf>BGuLPvX+Nw}Pz%q-T)V;^sF5ss~VD zy(CckI%aWcUnxOK?KOdRL_cF%NM6DF>OnbFKnx7&sH1Oa-U2g%&U+c!W{%+fc|@ZG zC4(%NFXpT@8&G^Sczd)3|3bNxP89@WTy0DehHRe*kQdMvQ_?#%_3v1zbOlB&+#4n^Bg7TZuyFk@ec%HdtcvOyuuyy_98 z1PLHr`$^>|ztey~!)%SAfT}ZiL3!FB2_vRVRpq1)N5sK|07RG#oIm)D_~ze2iXy3G=N#aGe$H}bppmCMKC15urD zBYDNQzvwY8e425y&2uCm)}6k=6p`>XSWXF~5a^BTO{bq#+6H+A{qeP@6X&}5nAUNN zu#wG1-AjyIyfBOrU-5N3DVgPM z3?=KCa-{Ojnx35U%-EKTxru8&E)k9df36s%fJ!BD+8tlXH;z1b(E6P8j_&lu1UG#3 ziZ8MVA<1mE}kilZE7d-S>a7_8p1orxsQgIJ+HwbBgyuar`a415jpG?foKE=+Qi zH>gOEyM)rngbbfAs~q2F`i1cmdLq)-MqBZ%tTP;?n==}492R#!+*R%jtSj!lOF9w2 zc4kh5HvcqN0Stt3%=2$3O1;sIOWl7K7v-z*1_DR`k4D~9+SBRYjmHZK)JkY*{l&gF zghnKz|6Y#^4qHzZl5Zzv@i{V&%lH{rgsg{nRRMju4Jq}g9vostXa33?lm!U5zCHOo z&cJS+b>H$hWH@>g>YV=g7?GF@ogKeFu0s`Zt~pibL;h%{eQl?}S8J#7HJix_NC^gz zh6GiYtN(!a`*wesFswSDd9&X1Gru=7&HAXRgqd>P$-TWrd_{zh>c>jmOHMD@DY0cY z)O0(8iAw+`u6?|trmC#XT)~0 zqwlp9+cAU$BJC2qb>>T1FQflL6m)rc9u{Mli6NR{^ap(cWgKTpfFc=!WSsg2v~0L8 zi^j_z1#;p=lss3d2tl(sOU;h=K|{vWk=Iycyv^Bs8&VrTM_;t*QGVc2#r)#}RwssE zi!PocnX4lDe;U56iSUWna@tQaj<$co+iO2N=*daUEbNQX=wYq4ga)f>ETQ1O10w} z8$$isCm3D;Kx~$^!0e{l=ZMk*FmFOi^}rucr?(R@7PLJvx@5!maM};SWbp2*(G{UC zxGvTTSP%>q%k~L)+uldo*MzpAy3^^vVl|1Zi~eh``Z_$W1~2#!7afz|c9p3!wdVwr z0HncX!lya*7wIA4Y0j!j#hZ9`wQu)ZQ8BpmH|Raw{9>unZ`((JOkwc;xrNo(Y^r)v z5EMJob?M@XiSsYrw;ZMW8@Lt3JjFhwmDzcIi2bSl;P4WM(i;0@%aEfe72l|3l*g3t zXaWcGr22~jgPPJ1yVEw%Nik-GWC}egHFHN{c5)tBPc^j*)935%%%7D(Jpu1M87GB` z&I$uYmhLO;gA6yCiOeHf^O*7o#%OK! z&qg`>1%9l^TZA1Ee2OBqU7ZSj!5J_01=AJy>agDL+(OK9-}Qd zDy*aLP4MgZ-Rz3YweCfbCSeql3lES(5cYCWckWFWzhGVoqYwS~BK~bQqs!eW5CM8(&Zj zxg=~lFlwE+$wJi8MzmJb=NYb@P4jInnsIGy<4OJ2*xusTj*}|em|{l)$zXzM%O3BA zZ%w^~0q(8Hy0g1X8!kBKPwI(0zIdSh5T#3Y@pGOYS$ed!9@)kB6}eKyI2NO?NGUo7 z!WtM#kV?j@{c8b-;aIZc?g>7~@PhOlPO5q783-N(xeNAs!OdcE;tu}e=tLDg-UBk{ zI5@Qg(P}d12!m$+8oiyKcmk=tJ2>)v_lPLHwby+gCc03JQ;WM-dF*e*x0zrQ6S{Ze zo9p8-bi!*mfVdfN_=c3IAG%+IwC|3idF|u)M%Tux{a75CME{NOZTx&`<7+!`Ea>j2!4}ZP zlt%a*35=!pk0h@>r?=2<*^r{@8OsMv=?PcwSEyA1gy`*fIf>DBB*V{-iX9 zPg!-H-RnV30eQQ97F^viW#E}A)xyx0F7ELxiybA;iq$`UXD+sF>kZW6FYOnG_ zfWim=M^6?Xp_ca8Q)x`&+m&l?e|VP7b~P}*5QtMhss3|lhRPsV_uX5-mG&q<_ak5V zOzV=Jy~O0GH@#s77@x`2m9A1i`S4gY<;dM;Vd4vrsa{DsCC;RF7nXUl+qpUTkb)*7 zKTdq-Qt(#6!uV-!jLr{d62?4(m8O|+E4B#p3qudh6;#Z6G*`>rz2C<+jyK<5^b@NY ztzr1ZzUcyx?Bly>%HWB*Z806YB~q2&HZ9t2Nf#ipwV~trE!Uyw>ZmUa>$BUWI#Mz- z`h^t*u}-8Y!iY(CZ;uPk|ZX(5ZB^t`IQfO-e)uXQ+0C|ztXd8hYu=Z z{bXBWYX|#Z#$E`Z;`a)tSqM!Z-aMoUdxLu!fZuQv}SUI!Pyc%^@K!ES@c~@-~fT&+GK3MR#{`ZMxJe za0)Iq6gxFz+gB9M+au=-MMfLA-)y+lTTM5xv+Pb_+pW8tIja1(7X8F?Rl8CBk8}?v z!^+z$$zE`o+3LuM$v;aoY}R)7l8(fK*Wql_sLA9+;mP zGgs;m|9DZLqWXh9Xtpx(;Z$xE24y~}WmeH%6-5{16sZ|x>M2Igwl?%lrZz0k;69Gd zgr1_kl+wuPHh!e^(oILs{h?AvpGME6Crkyyk z?O7B0&V4b;FxRE3a_M(lhFBP#@RtB1MVA-1#r=$okm)#NX=8I^iBR(n&uj zIhw_cxr9?@#db`v?h#shxK8?lC#~9*Lj1@%p+D1rN2Pji-+#hAhivOqtI4_k(@+QK zRw>iV#zU7}Sab~WQZc2f?G`>IfGiupBzSlBK0cvwDyu|3gKUfGE#k^Amr4!)5#VuR}%HzxIn)&=tSj*{!GC77J9w%G1?x9}J`2UhRs3 z0{zJ|?BbM9JAMP|rF(vMJ$|ezguidRfa>$S3D$1aG^$fYHGOp;%#*G8PT9Gj>5!fJ zD3`@8ok*3LOO{dQ$jNxzOTp36l>D{iClB{p{G0CApGahSTFE~#j$sfU>^Br{uZ$_qsv*vtZZJxC+_{ zsS34kSPtmFKEyNJ6b5k)N#^CL4*_QO(lcl>HwNLUjTR2!qXh{%THEjLc z^?^I+M5_8}#rZEoeLL}Q$xL#Kx=_m`F2mu+u%@sds72m;mknKDg>nk@o6LpH39nUHP!sCv1Tu_@k z%dD)njLcUtIgNdvve}Tt~%S~&z2ldUoj2ACMql5qgn#V{O zKXdZ_lYJ4mzhZhrxX-;zy+3AGw4s@o{8bshtC*ESA$&x5zyG5vDsbj_?$-Ldd}hN3 zCO!oj+nl~*uX4jTfoMvOBRT^1Ahen@@2a=C>SU1fD0{KF*%YyLul(?Dxq!AYikI5A zQ!2rLJC>W)p0BouFKcF<#`0_PeBn@d0&gDwVjA08xW9<><3lzvE4PWqDg|_<{TkZ2+u8gD!dVu7akbNQ+2itVA%5pH;ocR5OtTz5bYBo# zRuEoLTbZS?ch?$Wr=Xn6Ubka3tJLqyp|dX)p8BHfd`16My1}L`WDgPJ-}tEpkp`e~ z2hdTtq~OQ_m9*A!&#H;@@RA_YaC+Bxp4<5K;m3$4;7?zv(pS0^m#<=D_&JxLl1JmE z5YapS=RFUH@u(D!M0ZaQ(dV=UPAu=M zS+a5Wmt}}dl>RAwC+X>iR54RfNn7YbjZb1KFK?V^rwxcV5%UCm;qi|lcQHV5`eIIdyWcuEX|NxMzk5b@IgYakiJr5bGBPu%dt zm6r}GPa1#|BDe&k*mvZosws42DrK! zM*BJzH!Z3klBOQL+SFK8C3jo%LECDTyT8hw$LhvNSfo(|>n;r$yMp9cuiNAwWY{aP zg1zOJtJtOS@zcUfn|y-#W@c`~T8Dl=hf!06=s+#a2VA-jahL30C)zbq$1D+p98~8$ zOFIQ=q9g{0|L!=v{0NRqqjWE@@d-uOsa=#%Q?(zB#`bLByKESn@fVVxhAPQ-{R^9N zTkpF`spJBg`E~qFg>GelrqYop4+ZI{O{d%^5mB}C-x>X9MNp_W=6Tb0uj7BVv+mKP zT(PNV5UgO>Gm_~^!*QH@yo;v zYfIyaWv?o8cuUW5a(H+d=bq))%*NqlEF!f2u)&#Zs`L_?Jc9#C_^RU7ZIz=H#}e)9 zAh|`6Q7NE$QQPdI1$5R4K0b|0A|Le0I$nMg+Xc^}Ym!noE!UMhVD)lV>sbq3C2t?0 z7F+i1F0mPUJbJKct}?VL9EfON&Yrm0YZe$X`qa%|#XN?Jp)wbTTO)5!n6Cxw^kjd# z95jO&3!cPYv?och%QqXD&!(Dxu(`S>V7zp(#xVQ?&e+VsUy)gRlMn<*oopnn=N-^H zdXV3JceP;snrVB1a)Qt?sUY{E#Z%YMN?YZ4zryE(T@xB|abb|$d>5LY#izmucSwlf zmf=C{!Z;?5PlfkSD%)O}>1Vz0`SX1J-h;8baggmI1D zq`*{VlbB})JHOqW#`Xs?;6T^Dv7UZ;qs|Vm1J8;b6t;l}<#eAQ3mJw2@&w!}xu^-l zfdnHa|6NR=o@K^&+ezhM`U7NO?A>N3_U+H}lPOISlUs33QkYdTe?D~v7LHWv z@=%qjy%giJ+V^Vx=2GBfuvQ&9)(n|*Er;oY;h_}~YNQ!xj_UhH_+h%!$WElU90_nx zp6?^|HgWnjHyd0$<7XMaUGvLfkdeM}`;Jre_ z@RwC~HT%CYEP|^IEq(U1eP3F%FsAWXx;Oi6G*=s2#Okfg;v2M8krrMe1z{fk!2NIX zrGLM=m!-UQ-kT8$vd6(h_+npscuAb;-6tp?Z|*P9Z3z!m=GZ&T^5F@O2i&LiZ6v@C z?LqHk+|M)0!#|On;lp%k<*oYbaoI)9S)!^9O0DKzqV?Jl6>1}N3F_0sr=3?{r%OUU9P-p z(lgc*X?xv^CS5WB@I`Z)+Acqlb?N?LG;>?ls>7bWzMOBC=$Lo_)#a)~{xAR^(5SU^UdBP%kEhDthlQ&|rJ$UP)WyN|L zhBc?|7@4Nz%?^c^jyVZaEI1v#Y12T6P*LT1=uL{fU#7LJ_fJ)|bKx)w(P8b5AUOc`~cnUA*?OAp5iI=;!P&v|g~g3Vf(dNKn@=jdpn%yZ@47a9djS?dEsJp~c;$T?w~}V8bCa=8ww>T@D-g zm;8zoo`&^b#)qU-a%cSSnD?Gu2%Q1!Xijrhng6O7CjSk|c`sbX-JO-oTHjZZ_4Iif zq%qv+sJ8EMo84ED^OXwMaA#_kSq>doD2w~7X&dYeLn9RL*DHMHKr46D?YT|hFo{9GSbOCU$c_3fl#;h6Wu{k)LaQ(;qusA>QMOvLn zKhdRc*#?wz;l?6cV)nviBFOV@`@FRV-K!pX>bO-!suumoC;q|9pdrM+U3N|-r#1Mv zxjN9Wn2r02k3v+&!nl~=a!sinq502tOKDHuMsgZSNyWWv5dl5Hi z6{pspRvk(Hqv|!ub*F>fCkNUY3+h+g%*;2m#PZn;#|4&~#U}H(p-g8mHbzbVu*K%} zCDm8N*$lvppuzf~2y{Ma#2F3>Kei z<}Yg!u9u4MG+}VpB5f|HS{RS0NsT7zMv-a8-=8REJwqGzmQSIcvG%rf`oXhyZlx19 zQ_s+Ld9bnUO^jN4KENvf8qj_U3oXG%;-k{9_lHljgQ06jD`=;rHdBt5En``I0q!)P zbxHgGJx2+klL=IKN~mxduQxF1Dbrky6GeSqw2Z_* z_aM~>A3V7cz1$mIJ~%pQ$ye9F$n9~op`Lc`+a_F=y4|>vIaqNDq@=tGTF<%lLKzd@ z`}oo#@oW3vk1aMzk`+{C!+4p@`&mj9{QeJ}BY0t{CK8q)5Pg^~p1<{hj3G`<852Pl zep*mk{YT&~d$Z7vBfHY1e=vXJh%j$fcTza-=3lH+so$$y*wUPvzqz=8>?cFs z<*U2QLFbF3a;}KIEcqJi;daXABYrZU^q=QS{KE&R`C&eN$q$>F?7_9?GMT7k z-V>?Cb>OX6EbTV=sGJ}?qSs>5unV(Ry-z-Xb?#%o^J-_wDPcW-Prp3iCE1#EE~ll+ zH5_}C<50trknp<#wUCyr56<)Tz>PdJw#OsZqEh!wP}I34Q2UwK&Nv4(6>fxSz3Sn;E80Tt;Hm>z|-y9W`7JoXh5Si9Q<>3-Fj0SGl-0GQq6&CLhNvxW- z=ih95pjG-+B@Ry=s38Spyie05ONXv@FOiwf^vu^QE62I*B|f(iXlhT-yj0zfmoj

)bNtXB<>| z?zw$VG?;}cA_WMLuWxkpU`bqq^-gI`l!vzyJIgmqm5DEFjm;@^zl*oW_s|8wm8e*b zz0XFbT9w}8+|d^`xK_6-vkAYgt=Keh)4pg{f8qatTnp1$c}kL8Q8Mn_uNQo(tIlKi zpX6ZQc^`-|an(4vp*vd)^SNh=Ro#iKRpvBh@*kGgjw6S?q%KHqoeH6(_1wIA`lV^z zAiRs`A3r0$<3C?@`aE7#*py0h!ZV&RT$9)V_a4o83@+F_%Eo_IXpu`p#0RmnkYKV6>PRTk%i$*vH0e2KA$-EIE^&JXaojXAE*53ZKr9x)`Qum z7UB9BUT@5(waVq@friz=*QwcTSIWnOG4BIs|6G-zA;m{oOAc}4!>le3X(;(rUNgef z(7*5!tt5aZn8P0!173!kFHC$!crh8;jTxMQSIE;}csC5F6Vx;H$&(nH3E%(&HAh^MAf}e0nfSMQPOniL_ z7j57+Bi!(wmiNfn2t9a|2C1x>?Ls7;Mf~#%uyxQ4XbR0iiZG~93)7HJPQ|COV0;>D z#;*;}%i>vM=bScHgBHF=!NCGns4A2;tr8_sKh_4a@ zt{B5ZWXgYDXOdJtuC%DBe?Lald9&;{9%iclNek+#CCvfe_-`5NJW@!FZA`&&O&=p9 zUwlVLYHm&ldOFGYwv^64tn!6!H32EqrT>2?b9bz=kKq{R5PdaZBW0#`LK1sQ18{uJjq4Q*}wb*uTa%(>{4%;VK01*KSq zh^qcE(^@tu>pk>REghc5E4ZPCWk%EaO%C z&%%0tbPv5YmqdT&R)}mL3i4XV6jvmR@TXK!7qX{ZJj;Gln!(~06Vc5%7Z>XGw*|CW z{3(&T7JDu_+<_&!Qbi0h)Zwm?Xj;_}Cbifn__LJbIWH-7#rR}P@spEbTfxO^XYW%M zhJEnJEAHE}H`p5>4E?|@|MY1)YOBU;fR@a2X-nTo)!{n3Xe8yyJAvAW=7UAr+^*hFU0;)||N9fTIy zB@~>=9fZueR+b%uo2$%=%7YAE@|9h4K3Gnr3xsLX&S#8Hmt95P4}F2SFI?k!cZE44 z^2&Ay?B%9a<(R{>NER!X`!cultn!S|gQPK!EeGM-a%y_zD!WSZ*gKbs4pw(8pY<-^ zZBJZw0{4iaQ9^ zT8kD}ql$!cJZi)g!$|5ll7vYeP!8VLd+Mk=2qkg8GX(MjA-$f&*W^R5TcrikeH_3g z2RzjTDrfB$SYPI)M3L--)_uH^7i!obxP{DPi zM5t48>!<|&hzBc#kyj=3dbup07F$XBsm!&;-|?ih7;FeG61KWhHgd-0#CxaI2<~64 zohOXU9U8pb+TZb2+zY+0l&eo_^T46u{q~Ue|CxIAMORWHakreaG}#%Q%Wu`*Og7GV zU(<`Cn@pWKnelXBd)xB7O*ED&nM^4DsVG+&`L>C}E7;)|eoNuO5us;xlLaK?UPnWL z9oIsOax`n6NWdBgeD0uZkVvFNYZ%?+(*c2XdpL?3?WayfRx`iGtCGnq$3sx;Vx(au zeMO66%Z|@fLcKSiZ}rdp!ka9fSR9_AmJ&!TPG)LeAcVXh*qv(ZH>Fx_p?Z7S7nWz) z)ey*k3!|#s(e?>@K9M-NqOo)0su5>}F+r^NmaMFtnvw_?(x_3SS5a+IXoVT<|7f5n z-$buLmMlGF3C@o%cq8VqPK?AJsprrN^WyKE4no3s8pPF}Mx72q;$0I|xYfakYG_Gc z357U>Rwm+~cQ?0o5ZVLAvyHORs^qFRX=&JXjNyp<-C>)ib3q~29*v;gHnL2YMhrPvbt=vSuYW4(cr@f z8=UnNlqNf&edfv)#HSxS=HRS5$s<37`H)w=WnJZkdw)=f6Q~4HzGpHu=cCi6ALdP1 zOCr9WAv56gk*@9&ED&R5pq8^O508?s7~M)Fejy@&lnCqs11Ju?5*TNoMVw8rVifFj zD0Up1el31t94lNCfFJZE_M$Bg$??f}Y%#sOy>j30VgauF7cy3Jc`~NLc@mm zb8?LBF*sBh>XCT{wRV0tuIBgEOClz^!hqnpS-}56WzSQ*Z%VqH3wb{?>5ydo4tnPU zxyUu-egF3R#hbM+cj|mFzLvWi^Qho&TOYdh=><&`I1208d#|_`Ht* zfRdAjL*2={gxY5jye5M9Fzx%{!{{ykj`IBreyhrM>4S#a(B$UT4niMF_`CmYdt<}! zv8TF&?0Y&h^K-)qPt6Bqvdv`30^U!{lAW*_lN~5#lp;HEsikw`{me=8=mP$JDi?Wt zpa#P;VlYn}B(4JBW&+~lL7B{A@a#9uw?wkCvgxV=oB4M7kt}3Vvit@|LV5W!K?I|L z;3>H|#C-&2vSf0SPNeU_A;)l4Y=bTzbFMEopMuqayJ>Lz%MeuS)id4_(^6#Vsx^#o zqJb}O-d?j;t$TRbuU`6g@^K<|lER|I)?xgC5t-FXN4tI4sFc_8?ck z_s6pNjh^u1IPD}Zwz6z0QHJgOnmH*Tb6H$7o)*DF6c6r@K!6SodT)WI{mhGGYJ}Iv z!G7g_coQcvliHBmNaKOzCs7eL*ZUIhBH6^Vh1?Ut9Hgq~`^Uy{HQT9hx&FUXSiT-x%ApC;r_aezH z5*`hvJZYm4$ztvx)wS-`9#1_?{hdO*b6x)e;_Sl70nEZD-K&s5e7azHJS6&nIr0Jy z?hX=4@T`nG|L}!jp#>f|MKlg4`HoU`vDo%oI}t>JFDa7b*?2-Xjg7j)tL_sR)!fA4 z23JD&1o4a40%LCb>_Aj+KL-dDo6-q&IyRM3Vtl zU6Y4%0zY5B3a3h_CFR^*rw14cAhz554#zc6UOiEcHj1tR-a)J!uynF>Gtjm(L5vac zkXVJ}Py~5D=3bgQMWH~wV;yehqYQ&q*5boqKlP*5;s z`X$CJ`Am|30f|^+vYK=ms{$_?=mVJC$3(L1Ny~P_IR~dzTaL2&%qKA?v&>rSREbn1 zkzOFc&M>~dF3>-o5p){uFYMDUgU?T*?8t2ujbV>sTsYHiSGuKX-cIu3QDPS6oVyA4EfZW2Xu4$^yXXbD|MOyt_HljBV9W z6`249m?4$_7Z3xlgJsFO8%4&}bYl3;ZyYtwQ0-PxX`kA^+oQ_p*x74by-6~1385-` za4&r=N%(~UHR7s(Dk}VPdPzeDZiiDz89;xt4p`a7Tg6>H)D3wmCj|!yibe7T{AVh; z*4=`{Lh%R{UP?R~u#_Hh;B9SUj(aupz6921>-B58q3%Q7{#bHcIb^a=%!{q|0`7%`CQcJU~7Riz({dUF&@K;~-%)}AK|MpP z6Vq)quNDoPAyEd~Zbr-yWc;Z)i+Ff@&0EFP-0rD^+#qCOLB+7J0{)#VaJAHF?AKT} z(v`Yr>SbyflDqkG5@ggM7A>wpIw7u#q*V7aSJ^-QJIP#+3%@TSRBw}~2Sq{JXiSHN zCvYnL$RPDV$sdq;5H!BCyKVExK{i3sTToWE`yQkVVmeuft0<@iSmwbkZ&W0`8Hq}1 z8pY?Q4kVmBAl-6C3703W%N+{L$2-ptYO!Xr_!s~_mYIKk#TD0f#l(r)50*1O zT~}6fshz-2@bN`%=&ax6Q3Rtco!>Xw+yDk&7V_`#v@)#s*R1XPkO;Kw|0ka~6a zdfJPaG8moV6TDf9k{=LetjpsNUZc}^*~h?omwZo}fmCQuOonx^b(n-}IZ3?t4W_#PZ236ID--qTq5GeclbvmU%r!C#T|19f7bM={LI z<$K@Ay!9H!DU!u7g?@d<%}CWobKJz-j;*zV=OZy49x4J6K894zlL`2^25M^|_z#AL zXRIxR;0&gwh`h+Me|Am;a4OM@*YSZ%LB0eoh2dUNAF~gb%BmMX2lz)ubQF>z&k;|v zXuXMHT#4$qC6F(|-5iTQ5?njvOXssIn6VZBhjT-nLXa_9J10)*#OMc(E~FW4_y!tr zpyow~JQ9{b<=G(42t7}_U*5Jis{Ng*(?eYKObubVVF;gk1;H1)`_hAs*i5FhyV1qL zn_mH!s86VWez=1m?V;$Vt0F!bK8UlrJ+X$$yoR+V$RpVdzGVrSVUrMb0r)I=BJkO% z_;ZL~1d55oZ&JGEJ7*n_=(lfD$}1Lk%(0H%06I0>{Em<8P@p2|9wmtwi94%en3joo zs5BV`Jf6IO|8BL{_3tX)rCp({-nhh}lkUihBo@j<`rW%CNRvD3+-zQN=HxCtvKuP| zNIYrR(!Tx^zCmRB+hK=BhiGvJBknGgf?KLqy8EO(XPvTw#;&~3B2aSu>7@gR1*ApI z0LrjP!rn1=%VhYywzo8Vfkez_K2wE(bANl+7!(j-Sw4~|2#VgPke%2TlsM#>2O zLM}42U(mDn^%}D32eRO)0Fs^#4_|RAO#u$wk7Qv?pvUbXdt{J;J3n6>YPP3zAc%2| zPvr-S$1_O%i!FnFDWk38P|nv@7)5NtM)P?EpeFjkip85!G?Z>Kt`3TKiU>k@Ntcr2 z#P?Bns)Ks){v6ddC*TseBo`@*_fg`m*AQz7*N~vkU=p*%bz-r|l&0E^;EHG2hogJ7 zCu*dN>lLXcfPHZSc%61JbC4yDBXEzmnAxoc&$#U`**7>xwezv8^?kb+LEiUk*vCQ< z7L||Hhfe6z;xo~-EvoBw=Vec1^%8ZRv&%|J+Be~9bP{&_y^J(7RzC_{lIY+z4=tj@ z<}I-`VGYH;h+>$^M(_cWr_3@9AZT<{dA$!Xh+&&#MKY6opZk-mKsA(SpLEx<$y^Cn z4gkx||C00p3n8eH*|2aioZK-IBa-L-fWcVn}SELDwx)Jllb2CHe3m@i&x>cGr9Ixs~!M zOG^|wxxkH`PTJTw$Vx6q7Ax79yy+6I=BgXb-)k6Y82cgezic&j=wqQLOON1tK{+=X zpWj+L2-Kss&cf)H4VjJEQG?~4_z1!Cfu8!z!_~*+8S%dTn}^P&d(*_}T)uaQKEDMB z0M~w`LHBpvNQK~#Louu+Jzk=+1pSQ(JmX9iy~{1i%Eh*0F-nab-tJ2*b{NC1GBZkm z<5WTuPy?R>lK%5c)Rw5S8C1f%69VqqvsTC+|9xOtHLX(Gm(+n1R|+kgDIR!cZe^SRw}7d z;1&em1-gDV6g*@e4JNquZCras|!I3mmu2_8wnNe^b(RX!YgJmR@kpN_+ke zN`AvRg&|j zlt6_`N3vKGh+P?G>H$^=Hk26yRz|@`CzS8?a?UqmvhMU)n#Q*q&hVAJM7=7`g@9pe z89^<=G(sm_Xlz7mRswoTyYz60oQcfIC5`WJn*c#XDC%LR1XncX@lk5zthKr8aWR6g z*hz(MArpKerN|aCl=H|}N;ULiw!VkJdB6UT&f3!vDrVG_N30uZJ*3FGavst7@RE(% zQ3-P_&_?8bq2tAqnG~n{@01>-qa3GMUVkVib@76t>i+aY#M?422j6bHc9ILyvS*B> zQQ;hTorEx+5%Ejntqj?MpK@L-A>*grn3}Xmf~eL9A<3fu@V^M${v%Mb`npo{-kWab zY$g4;waJ-CY5_)}&t6?C)$H8ON*&Z{gA*WkD2AnI$WqGr+dDx4Jha4IECI7ORlX%xLkM2S>PMcfQAoTHXiHgre$Ng``C+UO#Tf z%h)nwFM(vfd1`y)$+e<9#vF(0WB#2seWeOrC8+#Sznrt;aTFq+VHge(W zrLULV-9kwxSkZvb=A>{4q$?@Los{c>y!(<4Z}}x7H_1eA)Vm2%hAVvAq&Gr=X3qss z%ZI$*`HOR832P|h_`UCt@YeCB?vDk`1ijIFpj0~S;5t0+y?on^xUzWvD01NIzw-6X zg!GOMi0ue9#H92NEiey6Cu+B^icR#ZYNp@eiUFO?Nfr7Ruph>k>z8L==o+C44y|SzJlM0I*>xbKB8ipr}PC$Vq1>q1lcQUVmYSy6QkL>A*e-!H* zE^(h_rDTROBbAFN7eq_a_1wd0CwYNzI#a@`n-!AuwhhFxQXr+>8N&+;k^;lb@8IM0MP++-^ot&?qrdT% z@mt^g{?3Z;HrZm^T9}sx)ecIrLxK@CD-D*|m9|IDBSIvWPqVHyJ{kM@xVB3677f>}YM!uoen+4Oz@ixxU4lLhmdnA5_Cq zn!eQCP6VBdu#5-q++!n15F&4}luzs{UuR55zOLgFrsna*>NC!J?Cp@C$r2nxuAoQ6_@4>i!6BY@q3nq~DerN>eBtm6*u#Q`uY>m(|fJDWc zpd*|pqn5K+7*%^nTL*KYS_V1t6%vq`ecJ&{84B}oF zCzG?le%RKJAo5Za*j|fNy}S>y9=!0XA^r$uwZD_MT)i18>}k80A($6~-0{+6T>DhH z))3w`G*u{EYE@%Bnl`c);H`-I_l(mxT>~H9CT$R>H^+UeV*&En!Rqu z{b+UcK~w&8PUYTj?1*4Qo4e_xVehcV!aJ`ri#6`$VfW$Z)xp#{#z~hsQAf`=ZCNL{JQMT4Pss0(=nZcMfFg6F79R(b&tT1 zA~R(|O243sb%AyG9^}`bKkgKq*>=nPf)x~SUzz6ij(RZ7+V`Tx0@d|mcE1L^^tM(30<+-Ybq|(J5AS4>HfrK@Y`q@59{K__?e~yDbZ00uR4!EC zK}u!5t72Q@REmf9ef}1&kj+`|1rPau?7ewB)a&~{{+d);>6{8#D}y?b?2N5WMU$;! zEGd)_3E9TJ9H&$=YNmuNr$`yuQp8xMRGMs|$vTWoc0(fuWBFdAbKd9u`SbV3@4vT4 zj}r5G&2?Y*bzS#$J)h5+fzqyC%z1={LI330i#i4+Mjix3Ga*3$lf|g$$?s6xAG$i% zG|dbMG!6CMgvzjnxs_@IZL`+qzC~eAa9n1NxQr7VE&l!4xL$|lefgcYm8$| zAE)=m^_H$N2;%A+M+VjCS6s*#n^%)A@mcyKww{se{Cw4zaF{dP!_a<~H|X#@GXuJ# zhx47GLOv~9x?BOBJ+) z7TrLAoSd$%TF`0;y86OO&?Hk}?T;mXo*Ez9pm8fY&MM=hkASbCw2qg^z|USYn4oyZe+c&7OR5!usXPA zpYTFj+3?{3=^YG>s5xpae=E_G*REDavbRBdnI@QOq%q`Y<<#3VX|-$b^_9Ns6pYI- zO3$g0@3l4&*mp*DD}VfEm*O8Cmudf5ybV#FkWm5%@7vKbKuDe-jsikA;6aLf68y`? zt(pQ8afI93_H$AOtEl_tOXHzC31OaQ+As3>t2n$KX0oa1n&`|WkLuo#`EB;rk^SlC z*?36Coj%MHyIniN?U~LJ5doiTy_U8s2VD&5&y}uPwwk$JW+r}j&Yn4GaW<4~7wI?1 z^2;H7@|@2wQ@YljrTMeX-vHr00PeQXX!i;6Pp}dVJ{j>M^5JmBkVt$*UG#EOJaS4G z`aEEuYEc=2(lQKLdixIQA3(^40a^N-dGD{_$wb)|A}S#W@klIYEh<@p|oYR$59%N9`Hg>y8I#;i&t#GvDuBKNa_LT2K&>c^!Re_z<8Tf~ngXDm!D}${h+{ z8xeRR5>WGnQW%6NH$0rqPYJP07q1v;u==MNV`yU{VC7Y z;T^j-(YGEtH43l2Kba^m5oOWGEl9DOSmiLN;&4hALPW)cBzmfnyyP9@msP$a)2{ZW zB7pbhkgG3m8w-v-EK|Rfdpd71%%g6ByWm@+GBNjUZ3Ld38sF5*eYh|EM;Fi2!0GkE zJHOcZ>C27t^lt^;g?u)p3@t?Zp{YEPMNWe2Acta^H_X- z$2%L|F@d@`P76eYQNY4Yi$6T%Q#V%R8G7!`c|0!=Iq2&C{MT8;i$p5{ejQx@~NkMxr{I;gN5z5Z)3Ad7n=>L%f8?4JyYn-xm{)wH5WQ%04}q< z%QeeOS(vUI^;V9koQtjQOo}Mz4x*M$RC1{~N_m*yzij?bfe?oV3`~qXKK*t&xmo{| z6qpRtGK^ZA#0nN7wXtYhtYH(uw_2BT+o=*Av>#Pec|(%2T7V+o}u=~6K-2c zfTj1ndct}mw3a#P&a>b25yd1mw~=3_qLL`KIsn@}&Ug zHJN{ELj%O#`k;VB`QAq%@8%pTJCBL4frEO{CuRO0Ex?3uP55S8F<3~)YCRC-IQU=B znjaQUTvC~oO9{ApN4@o*7I@E2_uSo}EqQ*WF0rhaf}dJi`D?+$+26c14-SRSkGPjB zn+oyu%Vc#l{q^2cO5iPl$y#b&vy#VMI(4hm%$*nef=~9W(-V5NtNbWmB!Hq0L$}br z?$t|+mJ(2fyqml7@7bu4DEe)h>KvW>$UX}L31&@BxDoe<<>yTqYnKi{r33lP2w44+ znt=eXb`0fUR&2g{_M*dGudTOVC_%HYo%={2>u=haT*fE+NYPrSDv3CcmvCxT>~R#* zlgao@Cv2zI+1oe*Y*pS*Kt>!5!l>Yr1_>jtZj5GzW?ck_2`#9=*d#)KV~C1O z75#P@7M=nNr*IyH8BD4>nfWKEOFUOcY3l-q=Ec49LY8|(@R(A-;M?|B612xVe|8$1 zq0`WcHhhHf`r62p+}vhnC1Luxtv4!Q|Kx>*po_V>(F%h{=jP#LJ6d~89F#r|LXvAM zbjy#XtVs|g+=Ol^k7@%4BAqA^ktN3Wtft=z)9>*VPNDf)vjr4AaI3O!LdiaGv+=j1 z!_+1rhyc0CJxqP-06b-_5i^e}^J95I&fS`IG#OyIZ9$Us$vpMn#7h|p${%z1wdTbx zVDWQsAoY6iQ_-SvW_i`;GOBEpNoJ^erl!DsB%v{mLj(Knh`RV&=6yS=lPvnpjw&0i ze1v<^I{dE3r$LY_%zHRs4}+68Fq^3oKJBOPy_Y_B=P~X4iegFPY1NWR3!xiG#+RST z;QWT4&Nn*Za5opcDl~b7`F==x{GQ~jD3c36xZ-eE>xiI+8T91?N9RnL7C<#fHg^zX8qDDU)yo zSYolfei9!fR|i`i%mVuQhN<6*G{irhW=b;81)r>HJa?@U6k!@v8I`aAQdDX{pE?dcLEO?VSF~asBXP&|YpIo{Gnv$4MwEKM+_~ev7 z8VX>jU5IjodRrb1;OHvFF02PqAwVhns{Rph3yUwx9Yq&JY8 z%o4!38;hBz7K-nD-UA5>0a-?^i7tjRh)qd9RnOxZmsKUGY^o8G@C zhY#SHm)Gq(-sMm_xa5y$fh88H`PSzg5yKMiIDbBI$ zkr8QLH{zJ2pW-n)Xr)JV+0u4$^X!?m-As8IY)BzYNo&&&X&^sVSDYwQ(6kJ1=qP{f zR%S0E^VINjq~L9%QqxvfW*?NdNBY=S04b{DdJC{MlZ7{mzC%lVs_SRXbl(Yd#b4!H z0#xZUq#F0yqdA+cMDJK5y(7y(@iiOAL6H@0WgnSt{_^2#XFE1tfnH0y9|cw5$NGa< z@q>|j`nQq$GAyIQo#Q-mkwm}c?`QS_zJH!Hq6BpEs-7LdgCsz?X@QE}0E}<%s)e)u zvFcycG$2C5&*-7jBB~$ZhF;CvZ^sZ>S z&~!aevG1P}9$vob4X#pv+L!XAb+mC!gwU8MV|V;h`=}Zy%9*<8pA1A6VwZBk%A(4J zYOu(=99bbe`3XjYHB=*~*1*#7Wd?@nMrCQ1Qlfyz@)Ccuoc&|xqD`C3L5)qN(=(If zOLGUEdTzJM=V;pJ4fifzphP;1uCc+AOP7jrO`d9^^l$QFqh|V+YERaPuj@Rb@xjv5 zuR7YJJ)|od%*DnUyE#Iu_!lOzbZiv|#)eM}x4pPMa;$L8!E(l8iQ!Vi_)_rAkw+s&Oq)gC z`3Jo7l*m$P+kWePSa>bjAV*@pRB+ha(KMKed@|@_s4`3E4IQY9j=Jo==o;NQ&E?Cy z$orw2JSzDVdxQ9l-R3Z$@lIuiuZPyIqfm&i4{TZdd2oXkc3u=aG1801ru?#~gt3qH zXWznND6Ea$_Es$o7@eL{PKCvFk+gg)x8(BUII%R@I(SXGi&AH7cpCtWTaH0;(*)$iIh7!Y%k`ZxQZLx z+Z#~)AAf_){F|$vhoMinLLNFZkfI(F-*F%QmSz3Md=9B2Jg@?cuh@_|SVl$hMS#bl z#{g_Wd`FwWCB9IkaEC2MMN^uw%U$sAwSo=qe_n83rQ>MF@fo21m5FP(4HyY#?S=WL`-?q>4`+~p%tnmDQHpHCI-v+tB zhGa)=_A({=L!h*F;eu2i(MLsn}x!?3Q@2z7r^2 zKO;7%CWzZ!NvYlLA^)y1rw^LtpK@%Fuf$g^C1$`PvQl{RlRu!2@@4heO)EVC-*?26 ziPg;O{FYr|_I;PYeFk#h#+13YfxX zX!r^EaA2j^hPGg1{>v+CLBi|A{0s`e?)709h;EAMt$0V_3R%Ef?|h?@6yCpYGmbsh zsrIvn=bQ7{a|MEe27$lJ9*?zFmGYlpGJ<&nEF*z2Gu^35_9aKO_>BbrPnJGRS_V|9 zgCY!7vjpropdUIguZ0QfnE!GqNcxgD^0AE-iTnB>eV@brf2tp8LlV-(BpiQ% z4H)=IA@FZ~N=<*eHX3stX=5E7UK5DzgYezRw4;^}iMX%pS)=EE-m`miXv_8FV!&cR z=lJ(J)IoDswKi1@4j;E~M?0Wl$;tzs@CMc93ic19R@hW_FH-|9&V^~e!2rO}zrB;cn6;JhL zmMkS~B=@$-@=6!`TZ~%JKGKuvkO|dajw^vi{~yPQ?=~R?Qxl>EGVRw+3PS%W0ty`p zU+9F&5C9o-%Yg^caZw4VHghOYogEfqKgxLm#;96Ed5zExBj&o(n-|o4`rft&{URm1 z_u9XCrEvuGny3uWqf@`ZNU8*qjqF@F!UTH+D|VMZ-nD;KkGhIxmB%N?_Td1Ov~Y!_ zw$w2-ndCUm-g3?-(N?FbqGD{jfOfq|l+UTa)4^I*8N_n-|@7nKXim1&QrgI?~Rz0^1e z@+^TBnTHJ!zptTIc1i#&dXcF{J@k5Sz!m+;ET|CE#dGE>1odcC^{VF`VU8Lv*3P0Q zi{Muhna*hX{eSOB=f89SF+>d|Hg z30wngQ*VWa)8qz&z8ZrrC_Qmeeo<=CYhLS6 z{I7ud+h)B)FKxtnk9Zp9XCMuYjxwVwFK5G93(gC>Qv24?I@HCI-VwI z1@`{V44__$R@8RO)u7(95-_lVy3Er!+4AhC();iIojCzClxUCu^lS#Nb<|vR=&l){ zF~rPAu8l;cZXVNR;SJZHIjq6-lk@3N`#^thNELz+*wm-7XLi-( z0_^#2%!Ly`N1}XoPnt5Vm{B_-L@z~;@=7OuOtfh;W%vHnga2)VdJLG`41>WFfsKFo zt^KY{)q$d7*}N;1pIK`N1514wa?}M>&)eUmwcGs`!HlzNse7@Z_5Yqo(B!OH%*=LJ zTOqEkUgCOR6J-Li+$$rcYCkVL&YAnSXutuMGUAOoZK^Trhn_s(v^?&$w~*jb=*G}2 zomgdq($ZncVl&_Uw|j`_oI?XfE0gr!s|A-C{2N*PA7-unKkk6j?k6ks&*AsK2mI~2 z|9>5|xID%P{p4zMR$;y%C*N@z`&c!{RQ`Qx(t4Hm*LUqMU4LC=XAE_xk=JgmLt7@_ z?A-9EUwd2snmdP%T))11x0ck2g9p884t$W?eg3VOim$2)>4wU{1z-MGc?5lXlMK(D zvekFEj#o9>(XTzW<*e{%;IWj0E{E zv9-ZXr=q>Z(wzTfFWqe3!T;GS=fGJM{~P#<+HvxK-~Z1K{~u$ZLjwaolguZR(Y5kA z7@A6MTX-AWw4ZS{`6gR%Daf0sj}4hF9}HX$b0&_KQ}C0e0{m!0d`@hOaFVd17h%nh z`<^FmwJuPQpA*Fr>R#*&9vcbh4PZWERpnY&v+{B;O`r^`7FT^sN0*|@bZBORXO{8A zQBrqd2m^n%i&5&;6p2E#^>8nBX?v5|nY{dh)0OGBS+l0xmhb{&Z@+X2(wPUWash{B z0~!vy--Pp+UFEK;M$IOZ`~A9#I8`#sQ)4p9G7q{4X~L3fHeU9uE-iT64dssKv+8QMrzy+Wbl+8$u|Jd1McC)n z-Po&)Acbc(82X5jN1mdeD3rMo)rj4)v>dFteWi5iJ4fH%@1X(rm%ltLdr6*A@;pOg zji4hRux_GlxdNAACDS+d$NrE_Db&PGhLo2d8G5%M?>sV z$aH@kg;y?xU3cjz+5CgLgMnP@>};;^?Dwiuz1ES^j8altG{G>{@uG%#l`60H{jz>5 zch;q<6!pF{Dztuc^2dG~m-5;Lvut=*&2k=D_QR_^xoHL#O+S*q-ajr1XlpDt>fiBC3b4!&? zd#V)6CQx4_w(mwELJXUQe#52yp-qKf$Rk;3u8Hrm>6(mURkNL*E_1GD8A^E)E{Ws&rcYoZIEA_nb!;;|HYD_D) ztLoIZ?;TYWm6gc@ncg4O2dXqn8SY%z2tP*nqNWRiByGPcqfH$VyCayF&vW~2K2=Zs zb4dBF)~r@|LM$=vZWNN3M{YFdM4`E6zKtWDtanK@MGCZc$sY}6j&rbUPyNa}NZ9b~_&$Q5z8hWyPhQU3SGb6l%sfT!y$QT1! z3`y@m>szJ(QBqC)vmWCv%Ne_LfF?-cb-oU^eMO&HaC2R5o#KgfD3(j7*Ac_iEniP; zHR(+|*SIZ$`Jt@VGsj+Sdz3-CyQI$Oufe#qWL5>bH^p6S=O(Rxv;bc0yAPwkw9%8_ zZ;wo3WirC2n0ob}MuNs;<>yM!gRZ}EJ>Qty36^WLYA;M-m(*#4s^cjdUDXOGgnjdk za9nZ7siBkDD%sW}Z`gN&HQP1#%ed?Q%hdbV!6Hw{(y7C=)T2XeN13#9X?Y@K%|vcw zD2~29nVrm&@0&<&c(Cn^ps2!}6A>T4mCLLt?6BTgz^zwruAW#wUtF9j%MI*IM`s-4 z@UT4$hSsfkHOH9u>b)r}nm`pB6T}`uVuwi520NPcU27q-H_fG^Wo)^B412hJno?V2 zgg7LYhBjHZKVXrtJ@?$tTwyTvp5@SM@`WupA54#CsJ)jLA4oLSbzLkqrDTPB`A8#C zN|}>2yPwH#tx}+^u{(h}A3xUcg4ZO2o~vY6dHAFseM)Xq2<{n@)P2`li_ar#&R?5Y z>^+UcCCm59B)j`gjuqVy8788=%{ZF6%Z1ymEv5{1(yBkcKteOxlP}$^gaBODp-qJR zz6{!^q-(M>XO2#$4N4bVdmZPn(ca_igajedf@L4`T(WdUs1=b^E8nTj`)KIhq@M3{ zxA_f@egi9=#-gZ(PXxD5*PYE^31E9xS_ zp_+MHeVuCWzUc~6ilVlrO9UKIDW)5XMa0=}vlHC4&u=iauh(~#9m8@4&;>*6eWjyu z&jQ;bj%PQCx?OH->-#HQla{NhukJHCbigx#91=uya}5cq?QJkN-ywq}NnMrE{2fvz zhc$HzU7KK5bFO?h?J72cp-Hws{2x~`^VrE2SPl#Awuw;F$Fn8c1aM=G1Gb=5KXAd7Py61I_b1Av17VmF-O{e{b%4TEJzQJ{nqc0#L`jyAob){cL&y2IC zvP?6S$MP%F^G!J%wD$Tt<4A~lP%R6Vhr_ZDlz?vv>xrYBwCf`tVTTbeZG zj=kP$R`eZM>2%f|Y-0j;QI4{=l{8oVnW2xVzxa1JO0MRLS$mmtTF}*D_%*l5nBQ9O z9BMM-6z;j&&>@FSFL-r8CihX3g-a*>$@^@(3KL?*Zra3ejVNUF_r6%W9CEGvT$%R9 zFsv7rQ-_Vnr+Dv>ay9>t@3{8*j~BDh%I3a}Ozp9XQ8lxJRgDD0sZ+Rn?#wEDc=7Qv1R-GuKaqMJt8O}$HzPst>;dtvEmELNJ(`;rhh zqG;#Y&~O|bn;hs8Vze#%O9Kw;kUacKIfH_Vdblkj=_)zoV)nazwYL@9;FXEpj0rKR z3;X|L6`T)nPA+UDrK)NR^`9lN+*HjDvZbW1>6T!;CRDIhYOUrQ18td3Vd^w4o{P(j zxm7sqEKW#z({wX*Q|v|W?4tgrKyDsx`a0V1cFs;$t&D~MXQHS-*WO6*Ts8f$o*|L- zge0k}ErcI0H@zfMO`a7%Zatc;Ih{)`CRNC{>TqtOGfX-4?5gwrw*z30qcy{?c+w`8 zJya)klH~;~#3f^={f-jv2XedMmd`^@af`6h_gNLTNBSqBOej!z0huvlFIL zNlsUs?@RjYYaN5=9^G$aBxvWh!BFyIt7vWF{89m-Jj})PgCBQ)WJ@}-8fa1^KS>A%Hk@Kr6+#blu) zt#D8TKeRjxoSw_w!}I*3l~@7#%xC0Anbb9i?Yga~XYpiAu!i0Aj5hJlm}2kf{)*;9 zNMO68T=3hWE%l8ju@MJoj>%!&2Dkt(+@cXN@`HMQ_>sHq9S#a|bH{LtABJ{PXM7V- zB^~+b0y4SH=3YR|<&xjLs|;wys@wD1CAnBmFL~rFE?@-mm+GqKyNO9ot?9jow+>g8aM4pnJb4OxxN0dnmSBP#3MWkPKho7Pg^2u7W zRo5E6N(*e7tUyPez!~7ZAbOQrdxx7OB(IK7&!QvG;+7-)w;)b_dlwE^;iMdKk+)fS zSb{`aY)9ukmR9b__RJWBYASh;r9ccLlg%ZiT=U5z&&cHmXuWd5mx;xK#(xZ6IQ>^{ z^Pwo&et4#tA#~)^AH~zw#zBjA#L$GsHw0}At?8B!3h~^3Rpwd_*$*5~ti9Fv=Kbt< z@_b(NF2u>G(Twv17tlDg)9(~+5J%^!jh~o{!4h^zt{3|86fHw0XBrElgwjGc6&gvW zuojOS+CL#j=+R<{l7^U#2=d@iU+gaOh%qS)D$uWm_tH<|s{5Z^L9{PEk)Jcg>4!j` z*x-Z<$QE*QnWh%{65fUR(kVGVoTG(Y-{`NkwG=ktWEjV`uf|Pxq6@B9_o9&Y-`CzB zdrHQXlY2-kTr%O@ALEl*=xTk;YNY;y3bw}*mmni`K?+A-w2mv3!E%_*!OWd$_mBJ{ z{UtSY(*XhKSZxx7RBUiBbIB13G~tBCRS0!J<$En5hipj;6fNRvA z@3YilVl!i&J#>4wTXD+&5K#+U)-A^sa}tWkBByI%-gZAD*e;&Pj@^nvoa*2hmnDxF zV3*LeN@KTkCmkSmwO4G*M|+pH3VYslv!Pc@Nv%SXD%+k71nI2H&BXO)4BM(BNV~`e zLr)4h#Ghy(rxWZiA~HBE^U)Jn z&ZlNL_}V?;2D#*r^Vz~$#<0WZ#~<9%z5-#CbO%1d{V#H(nAGa_Un%EJHzAT`t)w&B zn75c*awf52A5GTC4a}bc^DmP}aA7LO3!Zoz3o_1&{j_>DY*O%}PfFn69&kxe!iTe&I+HyxgN7_lFCNtjuM z5I*H#9WY>Q4tXPSp?gRYK@{VDUu#b$OQ*QI$wZde8rQxP^hUHL3-ZX8#<57r+bdWH z6|n0-=<*l0Ke0H5F0ISiToQ?raz#ESLaY-j6tMA)tMGjtxx#%2^|daBF!P5Q=LfmG zcW4VjeKu~+;WY0uvX{C%lW&@lj37?N;2Sye$X&F|ao@2aMISBL6E(1rx=H^L8H(ZR zLOJ+JCu~<#N9ZYBnm*>Olc>CoGk6P0#P-F?T0;A?u;11KPt6%=Q>M8`1lKHZxXelV^^%Ke6`}-_Fshy>x2=X zKLh3Fikf$=4GJLk8^ywXI^Z%-T!1UFS1e6Hs5Kwmw1dADLrLv>g*9oviVh)BdDB1S zq!#V@oz((=6JXEJ?|gip;T_AzHK39dQ(}z2vQBE}6du*YOBk!26*sUGCF!gO4-)5QJ&~ zXCI;0{CTMVy7t|ow}cQxEx@NnO%X*QP<_%^-VM7VzHy3Bb4%S4znurqu3J0l|vPeTjmKh2KN?jeMlI7fg-K)kVr5c@o` zxtTAFVG_(lv<(0xG*QwSBI_%C)CHS=%=a=;KXcV8q`o1)8I=^DQTOAmg6E@R^2iXo z%7tGc9{S~uykb{DwP0R{fF_Kv?V+?IuA>X&PC6shSg_-F3qtG@;rC|P{b$cc-eZyJ z0Rcn_p>It`a5!Q=G1Zqk{W6(=)7?kQ`JRp-u*F>RNIF(p7bEDInMq7dWBKuqtWDz| z{WpEmH4;P-+P`DjqW$OLr3z9c77DvxT!^q%)=E z`>Ph09dNZ5#7kj#6d1lNYN9a_RVXZoQ*cS;&3Yt6(u|M zm*=s*CuDXSD?QWiS+X)34n}-J=kQC4w>jqq{T?*NmZ+h=WI|dM_xc$G(S?bSase+R z!ukPjomDM$?fi8i8y%{quiu_4(}rqe(?^R`s-(U>%oKpyWKUXnvXZu;OmP3ul}v$W zcbepbvG_<||Jz;mQdnLYeYzMI5JFUt54O%0)JGwj#5KRL8%>N-^t5mA(a@~G}cQyRbdV65?bLxxdkAT zpV5x@-T#=VryEKLcvq6hBha?_`fvI2C7qAs?dFvZEXUGE&5}Qcg+8xtFDAyizi&r4@Hut_()w}2)rSzk) z8TEy_8JphADDI%Yq!b+EY{W)9BsW@c1d^paHDwUQb6boA+X|QVD~0EI&)gTA{bc8o zeCzqzd)WLlmkmQ~`2!y5jza5&HL>r{|3aNygBd5WH(Ji*Q_?T^cQ zU*qP6aqW8mam0EtK6`qj?-^VgopC>2Xq8V?ohrkANs%^43M--%^o}T>!co*1mW?0P zt@R?On~ooJ_Rr?>TKNbVaePmb4Gb(jg8&tpWcMpsbdMAiA5efHh7mqKw9^<8iRN$m znRxati&AUF7JhnX%gnqo!t*usxcX)w=8y)85g@7a@yZM|hsb&)(OF=H+Y2FaKb^ zIZO(}6Ju-LQfjVDn8uBc4pj+M4fGQiC@4p~)UDZxaCE-7RFsmROLp+RWg@G;>m<8O zHuuWtyWCMJ>?vHWo4BmOuGXaQNogORJGHr_^oLjw69VwwT+uW4PUV|%zN4$*Jx~Lh zOm!(?7TK|@wfzS#%uit42HiTGXZMX9ok}_Lw3ku1!ww= z-Cg0QzS!X||JWSqKCzrqnE@VFrNye6eQ?BRH~nHaF2aGF3s+vmHXfdyV^=N1(Q(e5 z`+glB`)Gh|VSrH?BkagzRu?JE-#=S}wO&6e2Jb?2Z1lI7b7L%yGZyRo zlzHe@uU(AAY}}P^OW2;fIiKqol))jy^MO9LC)r)}S!53*VT|o~miyd-=+E8ESR7@9 z_oA0Et#8)gCwidb#a*!^I$HSlvX{!~-S18(h7;Wd|6)hD;SE(=i$*3sSXuW?cHd?_ z#x8N$5pOW?`z%TU-0_%i0w45h)x=LYWD0mj3&sl__<+Mom(pA^E_9+^QF^->P|5J0 zOpM%CA=*|O47-yD6NC;3+*b&;CKe2{+XA@pnF3O`*h?4jk)hebj0veUxA-w>D3T3l z6Ip|5O7FWfDUPqopLlxi$fg+fGMbKarm+!Pw7eMEJW6Kn$dBe-?WY_+{6O<;0=alR zmsie9Yi7>{@gqe__C{PvNNmlcci&Dq-kKNBDN!kH#eo#o#9&B-P5U7 zq2XQTC7~7FxWO~9@4vHj&)KC!_Z_Yya(mHwsKx}b9Bimd?cnd zf0JS2g}lIlGqMUc^k!&pB!(ZMdz%@(g(;TFm+$yzu&cse_BZvb#%GlImC6VEWycOK z^{3W~F%~Jla}GD|#|EQCoUG2(Mi7ww*CC&i4NJf-?Mdzz~U=12B>h zHZg=;8dJhnRZ-yn65r2NQ4H^}s*(w=9oKtX&z+h@C3O?se_eurqcawXX4>RS`~10b z$$@Kwv652(Zkm02d)|$*y*qwjm*`vPtM3jERn-{{+F2GmWC?t+#7Fw!Bc1#SK_3YtRSBISjZvIh+9CR>LjP1Br zNUy2xlJ>|(#MU07>5f8zP~o-w;~YqY$_KB~+$|iE+;z`yOhW2jObn$LoHlzCdEO`e z<%{3eFPzVQK6W1rIm{K4LWGXu9TSInb6g`gyTfkKJs!|vgx8FSO=mPbmm!zSW}F>= zUV5dR=bRiUu(&X!ch2z=$ahJ)b6dib}lLGQg-l>nFmvYF>-8&`DU{YdPHvCKtuH| zni}bhj^<{B8Ue28X*CkGRhu}*x%}W&Zd01w_6HG8{z6CzeKR&<7frvMx0f-F{ajOs zxyjsx?a{*2i6>9}X|S=4(VM|CgC^=ufjDGcBZS?fAAWzu?_WS$-q5DWk~~@FMzO>2 zhUoTH|J+QyPRi3 zs_^$GXcT?Fn!(b1oS_WvksmlSYxmXmLlIr@A%z>r9aWWbUnF5grV0-rg%Qw5e~KXi zHt{xPc9^}C1M^Slm0xSi5zlLvh!s0q2?o_KQvP&}d-m~l?>$yav)vbJ5$@$B|8?5G z_da03K=_543WLxWBm8nLT1GO>d9$KxV-dx>@y8r6uXo>NJt2mf;g>&pXo;5` z!uQ@C#`Z`+*a^hh)t^(#tICiO-8y4(hPA4FzXjHpKC~r9syOR>5I%A*t=MSLR-jnY z!iv5g>IVUpXt1QO$Ow1FSEFfd=wN5!yEHKbxdkjvwB+DDfIt0=V{K)zx zE_5~z4sH(*G{QTkvF2ctQCDSL3nWZ z(#25Eo&I{o0qgr?2=V*EX!GZC70PVFCIk>&7}(HQT-rU>t!ACIyz|7`Ji@n~SiEC8 z>t?eKqK(cl=a?}T|6rGN)eDWX@#tVK+)>G6S8V(Y2MvBy`E-WwYN%^FX;6E`Ja6>B z6?N^PWY-7iskC()d*oD4C_&2>z=aGNK#RsV3Lu`ES7l~kfxBoj8S1*}GXqJ3~zCID+wjPUP6TP}@!gr4k`{e%f; zGh=a}`OvX)UOBoNgQ-1ek|yaU5Wt;~gO>?O3N!@V5pq(>3q0k(@w#1m3y#9v;_kB% z1M}(1(-(6?eX-BdJN3N%3DH%zS#CzLw$97^5u5$l3|mzS#|TdK zTsT1Yr|?40no&-1m9G9#(F5hj1PdD-;2=5}r+CtzJI!zZYIpicnJ7;0y5`3pT$!Kt zEZjzOX}%fV-T1q0#8S&DT@wT1YNxdDT{MVI0KEI^^5my`wc$dFkzkb4%m7*ow&eOM z8LG%AyGd?t3^uF& zK4Jtn;x;$K>{;508*k0@+vL=UIPt>ZEk#ix;f{9WNXR%4rJnf{bHB#*2+j|aF;HD# zM|;i)c6+Ar_?;2ZSl52u=qg$(L8z!^Y_cYM9>#sd^RD3zsi<8Bh8RT9g{HfJZrz_S zQ9#}b!!*Lo2QXrpzXT21C?7G=bi@NI{HR$cO6lA=k*K_^W96prw@wxQkcuF(Zt<~O zxpaUY4oCp|ekC>|A%OdhkB7v4Pi{s9CP_uKTZy=$+Ep3hmx;N?gHE+OEb41o@UtC* z5!j`--yINS)TA$#2k}e23Vjn2_x#19qBp`4ITX;hv;xZGc~?1Jh)RIZ&52Rzjc`NH zO_4)R;_NI|A+~C3FK)EPJ(1Iao>wX`fGErd85KWnUzw`OkVfr$*!$}I0=BURscidF z5ajGzAq7JGp^nX1&f1m8dA*zKVb44iN7}I_`xeA5710!zkivW2HX_*+u6&u7zYzO0 zegf+Y`io0LV`%V_q4fd^u@&B6SXc7)O;ade&6emAu<1lXBg%8Vt_obdR6B%hI#;v3 z6X2(v2=>S!D?JtAo+r9^uk5c)j@EQ8j?Qk5hRcrkxph>X^N*4WbS7G?*xu5RDKNHE zccGBxB?)CU{%h59(;jYcp+Fl$SC{53KL@a`3ocCu1K3W>o(09OmcZG$sQwBh>W~-A z`t83iLq7#0vGZFOPE}LPo2J*g2DCWhf*CH{_2p<&QRZQA&})Eh#5~-uW*9>0MiTSM z1an`1qO)rJQQ-bxqk;tx40!ZM#HTV4w`~z*+|XS$%K(!YLWen)H9RjuoB}YhomrR0gaPrQFNm zUS!WbRt#f&N(G^coSe1Gw=@=PKX)I`oraL3rsmz$=Q^C_etcoiOvVzr29(3CvZbOM zZO`Hpy)B3!<4~bF7C0z_;!2}k?jn}bqd3g{$O%&25tYaK>F8N_DYFDC32kxaY$$o8 zKRK{}`5@j=O??MKt>3$#*yaw~Qy9lzo(>m+8n?Qbc@_Id=M_2 zN`5bpf%Uz!8%SCNpG_$!Am$a;jj>jW(2@%ZEv zqzvK&Q2gGXysUJ}uvorXp|0hX;n%!~N96rYORGK^w=HNFmZ=;9TEz2@CgVOg%6RC- z!CtwtD=&FX#>6lnlf_atAx>+zMI?^Go1hm$-U}f3zsTk$oLRKRF-}yGd*J7){@3a0 zFihJKg%##%&>3T1U1Kr^R{>9P+PdIQ+z4mH?S7(OV8CNC(B(yA$_PVyKj;fWPJyX% z^CKy5Knkkqz_mkGiU>L@Tyw z175TYEz|JBZqbjhyb3u+07&3mqSb6!7#v8pf&fqADh+6c=()T-yzb;ok{$-M0ZaMh zrAs({3Yj30dTGCD`6{44!84{E%~(Sw zGwi;Q79-Wb)UhlwCXXCp*s9qkERp7RSn2^Qmb4InTf9#$|BHMp50G5VI>E0GuayC! zRc*n!kDJ!|Xoyhdi}w_nnshQ0u4IWK8&8rlX#W0f&c=gww`1jkEr`{5WKkV<-34us z^zA6;&7(6U(qd`?U1#n`E{Ee1S`^rlyEs2H0E&;qAQw&?@BL>qA+FLp; z=JFL!;-K7I9C;q7!5ngZ0ig5en9oh2Nh*xVmw=jAau}DE!D=F<>S}Mm1C&_{$79T0 zaBY;CAH2o6Cq+5VW_Z;K0eBqm(2llyhxa37TZ_C1Wn z$7mVwrPLK8V($m51)ww_>+wgy>}?T1E|5oDun}rE6dJ%rr$Ii~68PC2+k?Xa9nF=L zT8l6Ve4iUaV<>nQPGrRM9ytWS+5;HD?#;RjaFxUpieVb7GSjcHztMyB;YX8>2dM z$@dlJUN`G7cXq5S;}|16nCJ-0ILE&4UYcPH04ASaQH1GVVC=7DZ%9r)zVrxfr~)Gv zP%r3UJ0kQ4PD%a5ifXchCT1NH^`=R6TpX4Gw~Xo_lWWnz?}j9OuK!GeXY8t7{1LcNKW)8 zuR;oq6n84M5;qv~&)ulQI9x4yZV?S+^Qzd_Lhzyk>aPp7L=!OURjl!_AR_knl8czD zsgjkuXgm0w32Y8@bZ8K?l>$laGr01=gFk(f0|l6Q0w{TsUNEp;t2VU3J$FOHk4?XQ zVPZ&e6F!1y_>?S{SB)Ssa3Hi2*QQeVi4udu0XVeZQR2ThPYNnfe8mX!a@;9+H?c{&F5BJIJjQ7Wn$NI-yqzC4X!lN2n!m z&SuMPk08LXjIfX6#&0TB!~Vdh9dScRVfI8AF3&xM!mvW;g3JJXvcXV+ACma~9awXT zR8$FZCCdS(i6eF!(nR?e)Y{Ouu5_0q2pXlFNx!d9Pkpo;RQXLhQaD}CyNO=@jV1xy z_tTBJA5X7A3csza(oIm}h(KWpdE_Lv5eUW-=rG{|>k!T&Ha(G**c>eO^-ti@fG=#< z1E2d(4I#QifF~f#{S{6S#b7xvfZIjv)}pz=&96VPq1&zag`cocXLQQRu+mkCR{_vq zoI>!sL_0r2m_q(s7sevhR2WfmPfx#KGsb~AQRQle%jr(R^^^sqkdQ9!tOED9O=U0L zLvxRFikj`(PXcec$mbL_V~=LL%tsnQzXB){IEM*bSk&e7v2f%@QJNVxf!u0y&Y$S} z35_*`;3e<|b)X&8?1#J6F;pR7!=8Yp_vdDeDs2z=L9MQ z2NE46|9OV;2!8VmAKZX21FQUiu?xIJjN{7)Vktb6m?`)WFJ3lUv>-q76ii3KCy0Xb zI?A*T-3+hspL=q^pWA?qC?`AI`48IPzP48eN(nz(6kA(17`nxSvw6<~ZS!woalhQRt|Psxnq6AOSU?S6 z3nH_{f42I5*|ojuPw{XaaqI8W_xMCj$}0<|Bf2JU;4QcoqGgJxk*IC|kR9~!g0VuW zscQI1Yc#_Hy-JooCPL=M=yz?5`zTb3vFh`DyUi}(8LuKms_ghWLdErXoLsF&ZdSsG z(J40L^7O)O(WqkFxqx^WCFKoB@dzs{joGj83{QaAk4M;HlR%sVCc9IQljixA-#`qP zv!3zhqWn@~XGFhi(OaepB1k{nP!%4Ke2I5Y^pJQJ$9vpg+)>w6IJ*4ntKD2tUbEp5 z*?sbsi(E^u(33RrcOm%4Q;y$L3rMi1B9n;!yTG_=9YzFiJHnNA(T7ARQ zlf0uOO9&hw_vA7b2+|iqa6uNErrlz;4;`S1M~GK>MW!>*GqMfYR~9qWl!&>tkpP@O zw#f2r7*m2Qrn}L^&+(4eqy+}A-)TM8N)(j}*CoF*;wa}A6FAD6Fr}&Xx+VOT1*}`N z3T5x+E-#_AzUzg^v$=NPl%Ad$?qY$d>KH%>$R06qKifn4RmH&$Xo5+$0sfLx9A$%I z^lU(#)A!Ujr#ap3nIaa)s>sk$KOK@|15W$gTQt8`73>}x2wM)k;AL%&iJI-3-*3x! zLm@>v16gS&bx`s0xpv>lv1uz(Fr({h`#kwJo8yl2J<^P>cWMdXfkzXcM3xrSZ~H#G z8U#BRABe>#4x+oaaxHB@Vfww>9-Cy3B^R3;aMwqGldH4o^{WUJ6K?(4|42ZQEGhcc zv2vrw*x`oTJ4+cc+fB1WI;|p9KELOnVSbV&@a8pDD8Cu6D8-vuxQWTK7+`T)8JV5k zY!|kA8)B%UILuH9LeNYVSu^8e=B;*Icw9@Xja2h9GgyMq{B1v#E>)<3UFwyEeur%p zer;7b-reWt%`8w~(7+qdfM>wZCNkvBEl+KjDlzjW&cacq zjFwH(LljFcyJX8^haH(dVZ)r*hhE9OHV?Krv>)xJpxOlrUeZ$_tV&aDznazVN`>rR zbK8Zi6KNlkvfzycay6yJ5AQQ#<~e?EFH3}-d3haiH8-toA99qEILfat{7@r(&`D>0 zf8M|+jaQaDyO8MYGA%Oz6p)%dpxC7wS40c+!lU8z)_QH+)7X%}V&@WWoX(=q-_(kzJDmAq&X0_SmV?us_&VsA2`stQHslmWh)TRtH_8Eb^6~zNNx&@F zt^F}@90v987Fm*|--Sxaua_N5EM68JKSZEDt?D8n-6@J{#c^TY@;wkV0(z8!Bl!X2;=56nsM?o-XS>1wpOre_?mctH=$w2 z(A_(^C5dh=q_>!PKAV0TgkH0AnAL4wB7JhJfV)<>-P*FC?J-NHMej>DYUDf33CK( zU|FYwLugGuNgIdQJ98QF^S-|nD#;uN3Dk3RV)=c>0$7h4?!@QS>IzU! zHGxiem}H3!^+4g%0FLsIp0%6Na>q?&O8MrMO-d>E1RY7)-{>g1gBTtv(aacco`+~I zyO8yF9xf&qh%@e%Fv34(n74BczKLJ`+49jy{7nTKfq1pAz) zf{AFLhv#x&^aXTuew|ZnN?BR6ZnrJC?RsL+mkh6Sb*KHilbY{ZM*JhOL|pg090Sjg zk#S8#W3d^Xh82BV>_fmkW!Ro*S^#SAagdDZC;CeW#}`h;2|66c)RewDp=CN64X3QX z(%mq*Os(bVq%kX_&C9$r&<1?J?4nTVb6xYk82;!rp%Ow}sF^HUnHYJOO+PF#%)@cI zZut{NJP_gkeSQ4oQO!Yi!%I*WnVM24JqkP?r<6?x2YLy-^=>Y2tht!FX zENc|pl-xw}&nt}cGzEeu#Bl0DpeB}4Xp%q{Tj=Rr?+*X>uo&b@P2cs`I$(p85!+vr zUjA%mZuRA5;BU@Plk}#Bq+Tqx{F_>_GknfQnl3amU&TKa1_MIQU-~~2c|(NrTObBF zHzh5$+-$*9qIys;kH&*j;go)ZyyO4K6G`~V1B3gK8W@f!s=Nd)$JknD{HvhjD^um1 z)gg3;Per8{T`ofnrW;>b%!2o@uRBvP3EMVkOM_T>Y2%jHbMjaaz=m`{vuiMt5w{i6 zv>t_3rX-N=fNj-n@Zm&;_yv>#!dx;r_+>nDjghI;Xm~dU+eYWgq zYGAt_xnPL!eyuHQ?%ySl3kC=u)^-WNr?G6LPlfATcP@>NK}*>1R0>umQ#%#Z@xC>2>a@fFtagGZ zX+AhLo-6eH?K7D3>0r=rq~;79V9sJVz~eB>aJEzs{g82O{~xT~Y_`mWJmV!XXngSr z-aa+QZ>Pt!UWAI-K^A+Z&bTv{BWr%s&PKP_b<56nLVm$J$(sHHd{ND?eavXb8;B)m zqG;lK!gaE2(0>dnqB__9In;0{8*?bMo)Cw~U$Y+z$HZV&F{2NcJj;x(hJ2LahVe=T zGx==UHKC_qFHQVT(4nn2eumaqncJ5sk z;{!97mVvCOyKLDvp=WZte>e3@>t{iXHI0BP|#6A zp?TOzn~3y`{On6i71zTKHc}TBo5vO;2d+Fs6Q5wQ0jG3H?c0(QH4dq`+8aBCZR8n} zxROj3yVm5%eB|=mK9KP9jT#m_^+An0Y`QH?d?PVZkISp;Sb=;Dj=&0=|FIMtlMna! z+{_wubl0DQ^iluu$b1~64WUrs6=cNO*OW=NpkpCNned=}?-x*)o&(Yxjd-);VNti? zR7>ISyRTIsQ!_+@j&7NmC z+8;!rS+-aHUTHv~!FUUb@yy1WwJ@Q&iu?C&~43!aNGt7{lxJ!a15;zE8!`8%4E5q z!^1b_^Vbel`Fq5We$`&uPCVN~VKQmr6yZ5@rc1V&hI}HS@RSmgapLM%=%LF59XW|l zsu;-$C&pSzng^P$G{~SbMv6MlNizh=;^KZX1ds}3-ce4Od_v(10;NA!2f$4w@DGjgyL~K7`YrA|B1!k3Uqbh5$Bp>a;9Y^<_+@eBbNENZkwrW=h-a9AypSPb?W1{E{wjhvKw0>pX=SyEZb_-jslj+ap zMKFj0?%_H*_UT_2#3c(`NGeF^DR7uJfl;x!yL!@&w^2G1n8TrEdhKSDlL9?}yZ>|P z>O$mh@NrBIAU#bC;+sT=p$wmenpq-;qco`M?a_cQgg=gf&-2?E z9e7f^k52+3(YUhoTU(?eHXLUiWmDXdk7WeM?~m(ZpO9yTyMfkkwty zmc?mgudpj0N+jx=iB}=61{bu48<WA0!)x zzy-64E@0S~y|&2uo~wFs&cXs*;w%Rb{IceRghGqc3#VZHKHp8o9v7TTt_81+2=<}@ zL9A}9Hx$|}sY4YzqB@2O8O>E?ZZ||}oI0H&Il zQ1Hd*OD>xpi6#2BLf0}=B>HZ_$+@s)Eo3%fihEX)dn#0Wj@a8u6Dwz$1RbTsHNLH4 z^2|DJhQ{zmQ=M{sZnAQDe`dg6W~SUfv0ZDq>rIlFFedQs_N=qBV5P#uoc~l=)|G=# zK$`sz(LtOsFC`B^_PvmUXGOE1_#Y|XFf~;8*0*)5ZhHr%a$T+3iy$A@lqc)Cb;M50 zzYsrw2|6lN7iSC_M@j-T=oZ5hqLl)D;^}#{i#$6uT38K%ca)IV!Fb;r}<^!##wG)fH z3z%6nK5&V}9z1?;OM~PgUQxlg7@mjgQ)p$rZS!8vKsB?}uk{!^JMC=?0~F8I%0;n4 zQJui2ZAqxNWUc;CHZJA-jHFzG6xvX&M1_?#tS|@$09}*4-2n64z6zfRrCUKzxqes| z8Zaw*61lALrlxW<4IqHnh6}XgW3l|yd+C%rkwa;WD6llW$4Ac zrDkuZqBzPm=_wIPwomULN>)MM*Pz&J2cTO;6!_3ul)awIdp^3p=wvfF#08#~2AU=y zJ^#gtKSr*4LsDscngu4VxFn;Q=pt9lXMIlhK;dANko*Od=Uloa&2B*dZD?^;2@u$MV123+R zq271m!yNZO#-HTLgQ)vk(mr(*W_z!~ORDsi5%t^k8T zSBd4fKt7;zHe_KBNhR^LI~E!d_jxG5$mi1NI14wfup+?u^b}PBtsetc z6B*DR5LpL^%o}BI=YGYy_@u>#+9fKhw*KwjV9=b04+sE~+=IGbgJ5h%wd62l?n|iu zSZK+gBFK~@De=i2-1gLbL;!Q}p@+~l*b_tUJw20}O}Tgzxj8;=RXzuXeZF$63{qWm z%S#vw9;=t>j0eNMIZgobRtq5hAvXP8c*P?{CN~Bns5{WGaIE!Pvr?l23Re!igt9v0 z!u7Fn+#KZS;|K|3`2JXeXHot!lQFOF2$p{1d2;}Q(8~?DB!w)pVGy(&$04D-E;Nf7 z&&BlvGUcsifcZc_*(J0&T`L?3N_2UGY7aa;rQnK&prZ5} zv_x&=o~hIB;KC%)QcfuH)e(hm!il8lou_x`nbGL}%jIG}%&n6&x6uh(d9-HkV{24g zfAFtIiE9w*O;bIyN<2`a=zV|{2y9W#Cazz^4>PdH61^bX`t(w}F;OGl@s3pQ+L*mI zBg4~R+c())^nT3zqPTXW$EF+zoV-5OHJyQ<70YPBGEOLGSM8Zl>V6=VB&J_~etL!4 zyD)O56S{_}qLJ3V?Vg|a&I1c0!d=j)hl~X=I)g2hZxRVgH07Z zR7YNefh<0R_7lTt29eKRJ;09|Cq;vwx^Q3qMYTUANKEh^^7VUasl&a(DKXp#^S^0O z=&t$`3{odcaYx7;;_j3j1|;q-wRdNbxdIO60hcNddXqkUJV?H zqsF$5=flf&YX6WfR;-hUcVMC+BI6qX?=|Zx&AIAuZG+-U;l#ZL1;KM@%-%Y-bNzE6(gg3sNN7g!uw#wD!7}855R(^gd zkqaO(@OO|3hz^kBa%y2&7!)n=eYx+%&x+=35>_!R~-ax8{O&McVFrk>1!< z;^+$v=Y43mahmL_RDpVWHu>8~OI5wD6b=%6U*QA8c!Vc*<2f^ay3B+t(G5R?ZoI6N zf3Ce);P;~61i8#@(#U?>(?MFWnBj<(A=6s{ka`pWd}@n4v^n z25|t4Sph$rU~a1hDS3gRSgVig;6w<#CNc^4jNFIRjTr8Pd2kK4&v8{M;jRw?q@#Ro zcKDUk@iBBhcmnK8)j8+!5;NfH2(E+{LGj^&MnX43EoAi3*C?8*3S-6qE1( zFLdJDU{ejmAgnH^nr4ZQ?E+u3ni>UtLYN`*uj6{5fCIYJgy{?eF!=m8gH=1FwLSh-o*Dq(+~Bwu57FmMP5HajS8> z%!!}zV-t|SCqoMDi2C`qZ6CcZ40rB=FbemBIA1sVnqQ7r0AnfzE+Vn`x$;hOQy9>R zjIb-s3m?jK_?zbVz+9BLu1{4 z(*8n-{gR7A!)Uw4ir<9kqU7d;Y=7(6TpNd69QdscFmJ|g{Kqn@0M~wXFq^Z1zOLY( zqls|4%b^mvBnbSxYhVet)K6!8T+uavp3oD#!wIcEgkH5#5J0fo-g_-OF}G`?Av^*I zL{D&>u>pw*KlI)6G(IqZPmB`PZKX4=%XTj-^KRph)F^sv0TRN+6M6XNlCvwcZ$ft= zt3Ei>lon(ad#Y|08s4s`i%ji#N80rh9x1o|qtha_2AxTm~z@%GI`X>K7;ea&!!bJJ6(3)kIFxn?9Oj?A>>n z+d4m;E(mSNIZ?WJR2dzsCt9Wh>8*?r|3VmG6LKQfXcTC8aW4E${8jy#n1npM5+C4e z#E(}zW@P>ca$4w-B2))7%n_mux_gt=Q+fJMU*=~2`h)oq!DxmNDKnZTIfx*%FDg}& z-4a>B~(0=p1^9dyeN99l_T4ece~ z$gd+WkJkz~?a8;F&r~Uwx6X@!MvR%&~1v9R5*sYV?AW)iJnj; zR}{Jm#w%%X@`AQ5)T>UIo?iG~%n?h!Icm45 zEwy(~uRQP!)&)VSfBX`dvvbM9U$54W%tZUg$|p7+VR_Y<`fXU~mL)1QNtzcpyyoxR zo4fz&8cUsMQl_9tXkOP~F7LRsn=zj0=Pxv{;%1B>0lSbUi9T~4282&xv z5Y|(O(;~pF?@zPq-o2Uo(bv*}91_`j@M{<&cCv3U#G(9U(n%y{^45s5Nl99^qsgxH zNLOsBpNnKo&a084V5``;zk0ZaKT+2?BA^1>S!^$pCuIyyD~vNJk?nsEv90!=Jne#Y z`n483t7#&e?yQJ0g63FO@3ib1QN3{_nt2eL)H5^vn7-(dNi8$f5hEq~r%zAyl=n|Z z97O|$lb6JBQ|yrMEks_Ep%N9%b_3d^jpA|+fcNJk$xZ$pV_Xf>u3ZMT9GSAO<&9Hc zo4PdwWkKAcy0HxRv*H;IKz%k$)kMK#GjA^lo(gQOwhcKQyC!G7pXJp94=u+uq5j~$oxb$x_e1>&8d|r8 zq6wKy;nz5y4O=ynZ!anGx=LeW0p!wQdH{DMvnO3N+m|VyC6I?7-~qJB%GhY%#K-ZS zAAv_~*|0mS#Mu zM88CS67AkT>!J+zGY*xEz3U1c>N}xP)fxYkK;GgY;gEi zs?M#%#9g~;MV!PpK^310HkR(t9rumBODZq{AOI+0HJbFEvp{@85U*{dUp_XJ3^KvK zO*D(k2hB~n(;>X1hTSclQ>8yA%QxiaW)%Xt5S+aVrp@KuU2hl;RM)#ibM| zcF*ts>3+QHzVBKmIaxV#X7=pa``OQ)Jqh}{YQzLj2++{bh&9xe4bjlh(}CZ0csRg! zF)tS(aKneIzwkvvBkBL|gPzSpLXU<9M$=GMFb*g@DhkenIfh=#J@s81$uG(q6v21x z7qUyvz-{A&hG*h8hK~%#c@Tka@ zv}Z*I*-~6H2N8#kD^K(9@6STTyA}_}zc@Qh7UnOW>;>+=7%m)pJ-Kt=b8pmLOI^jS2FaDZgw73h9+rb!y;E0zsz!yFKus!My(xulN`3`@bnlgn2` zT!{UJSb`x>a^nEfND@GX@R{&EXcxEcTRQt+R~3{mIy_t{hA~U9c5@cTF-clW*3qn8 zQ(vVhP{)^YYDNQ(t8ihMgmo8TtQ7 z29=z8=x($0mF|20zrN9HVQsHS8YxDO0tBsG-$hG(r&dWQ@)*RGK>x+vGJ-_GKg{9o zKr=LM*jfKoZy5kO(&T}2o??z#sPw2GOzo4lgp2FoUQmC+*D}cZiGqx#sX>$XbUq6= zG>5kjaH~*$r&d?@>AqowgW-aWWO=c$p)ec5Oz)-n`BkGt9d#8~Xfs{-Gq_W%&M~f- zm_}jHbC*XN`|Q$$PZ-|d`2yS~uj05YmSK}c$IyE5$Gy;1S*XESB`2eNaVQ~??k-{U zx0e~^vZlrTX(oy6r3B>!9cyH-{DeMa4#mGV4gHIj?Aj-sz?h~N|20S z?slbngX1`J7Uh@1)=3YQO-%}x152w2%8C5vyr015m$qdq%faQXU+Y=ac2TpDc>Fq^%3z{1Y9bD}LBUya zt~lolt|Fijo+ywF7q#}ws`tjDce|v}htySQrNneLh>z{ox~Wm{bS7{|@hwLS{WS&L z(fak2e)qXJrU}?6{a}KbNJir(?~mIj41+9gho=u2$TDiNju1HF^vr4G=^NrKNlFCP zbE`3-OogVr)y^7;7O&v*$A>$xs*_(u%}Hx+D{}c?XRz20pJS&||9@3CtgSqxDhAay zX3Vr^3l4bKPQK-I9PtWY5nNR&mV~W)IahX~v7jsmp zhgpH;kB)e|m7`=JRj7h=rS9nsg(Ieage1be!pKjbqEb?%c8lo-gIUDGO^LfEv5&_t zcb&yGb2#;}o8>-|KSS7v8+O{fKilNgR67qFEX!m!QiL!UsMd?i5qdiaxa|hkG~!(s z`k~rctL6XhU6U<8!@d)Rqn%RRULC0`ekkQMN}srXO6PScMeUycw?XGC$#jWgX0%0$ zZ`HRf8qp_%U2;4O?aM?Aa$JX=c!=3IJ30j(#wz$Di-+r!ArLchNpCn;=J-NlfO>xJ z)ga%nu?y8ttV7|yq^SkgKj&938Vp9uIEz%pwm2fK4Z_-b(VvS+z+9HFc;^cqrA~XC ziW+eIF$7)?e9{%QJsRuVFF34l)$#z`Q}=yy)vp>IQ>JG#DvjCsgkQuwSt=E&hkIfY z0$gg64OCpoxnqz2Vv37sqza{??~2?Mw!)QHFtk<1uH|c?W6oc+kuX+TXv#Gu!LXrc z(tYwDzbg^*WWf&Rrd%@@95E}olq|SJ`q?eUc^@g5?pr~GqH&wM`@iS$e^MmK@XhlQ zWS-hj4V`4TAX3zYO~YG~PIVAgk~Tu;yi>|+tt_{TZ}QyQ#O&INCa#$pvAo-MBLb7x z%g|Bb8MhZijcoNZ;Kk%#suz)2Tb*Aw6vJD#n;4e2FXl#Bya@%Vn>fj8cxhP1n&-i( z2W-RNP9R3;0b+pRVE(~Ki!)&de3g9IMkK>5PNLt!FbAMW&o# zs31M3lIIi5(m(lVF*#h$P~xzQBZCxfTeux1PQOOGY=-F)EJCU6h%Uf z_oS&1mTY2x0s=`nxSyf3fRIZ3U1qSkovRmZ`$yFts#jL>+^vi z=1n74R>FuA3pO9;=rrQPZvVQ43w!h}dmFh^v`r<8)I7*g4?Q~nlEKQwgk11Wh5hX_ zg#Ffj1JtjD1S8K~=A=58Q6FJ5`*XYad#Kq7U|WAk33roUO$8cAWvgpjEv{q2gu~CO z{p=qB$hMBH6_!H_$X709v(Qv_ZILbrz$vFjw$>FstaC3Y+?J3(HS!q!cCP9`y+|aP zgDD62fME0)Z3It(S|9_#UHl+VnotO85Ky>O=~jv6&YU?+R?Z2kK+KG5wKl)%s|th& z+}qCJRJFo`f(&p>`<35Sh;MU=-`b`LJ^AB`mGvHrWl#8Sq1@s}g&#cke}*TLtPN%E z^_*|sW~t$y5q3MSVeOz#&#$($G<5CRl?xC46n3gx`29n0tsTQF)goRD6*~MLbl?kM zEn(z|evgbi@$*8K1#E^H zK*k$~me+uo_Uiei^MH6+mpT&`U-s$PM;?2y1^(^^R6&s5EzPm?)@Kv~;@`9{J*>sN z&Z-4~M%A?ufHk{E%oC-iKHL6R_DZr1Is8?hchPB)niL0#Vv)jJ`DpE(s(r@q-Si8;Clp1E`VklZ>LgLgP_I~fRzn`NpZdl;D z06(h0{oMjY2ny74)=7yB^%z%~Jitnh-`!aPOps)SdYBqm^8=;f8H$BSuG+v8AqV-u z@^>vy3(mU89%t#)!;}|)k$1h!n;UXN&G>8-`%`}hjNvur;~Q--OhTaGtoH|4?f54U zwkqPGW+bYRNLT=dL>L?)=*WEZb)!#SY%3g;Z`_f0I63KbH-;NWIQev27^Y5mc^98o zBCHT;yzx)4Ce5^xnre8EdlWMQ08C;)-~{AUhDidUK%G`|{P1e%!vgOsH=Kt&qMwhR zC*2wqnXHBynN|db8WdKUF}Zd>w3>&sg!U_tS_bEtU2Q!F%X2tix8S{DXeBuOzI+|F zxJ}Y8+b%*_AHv=9C(mrZyFaaLk1CsJ&TIi>U8^mFtPnI{YS1iiPl{!&2S65g55r8^aUr8baf-5+}r z^F{4S($mcM(#)N``%@n~eLA%xhfqumw%JxfV3JvDxzZP!CEZ8>9{xb1Eu0OWIR0vS zWWS!!y?Ml9{lZ#q#F`wE$*#g(1aXwf{JXB2qQ-TUp-!J!c^)+*l}{_JN#*KjEIo`M z$t0cIECDg_ANH+&o*tgzx@=~6eIUO;bnFbTVeN(}r;a_v{6IF2Gkoi|KE*K@a? zxn75*^bTe>O9U zKK>psVo&yq#IGbrs>}I}Qi)YrYQpHnoWzG5uGDIy!=urkInxZ6p!qq}iO~9NHC(tw z>bz1OEF;Ll+p^9yHQAltVD1ZnR^9`TJxu*3!Ik{l=Ve7;JX+s}6%KWT5t+Eo!H-tC4 zNoS#UunGXyYan5v$nYdbZTWXgjqcAH$W^8Uyhm)s&QrJdrt3O5qm^P(RAD6uF~{8( zG}k0=uVu%55cwAUIYiHg`KYO4G|&?FrCh<;TXLQZ!MC|V4E6Q5?5;)lCN){dBo*@M zg(aFTr@ER9h2tSBgR>HDJt}Xw(?P$ak2+U>(Bzm4)Q>r@hn0Swf0H0vH}rY?9ief# z-#B3%B<><{+~4I(V;{R7)DbR4J-@wz3CS)U`Gk>gb@$b~i+?bfO<5(+e*P`ley3Lg zT$4|;?>fZXF*`swTd99*|K>X{47%rb#?#7MEni7nPqM?BNo$~m=L~wG3NP6CX*79f zlDd3U=?8J>i>VS&5`HK{YJ6A{+)U!}Xr_)YU$!MYlNvgLT{JFXn1E-K1V3~PL0>!? z7|3g;c8w2PSq=rlD6!f<>$?)qPuoXO10-=7^0q(a+ozR}_wU?VJD2hq>CHI2$3!un zHv{a+;iE%Yd*ZCVD~Y?3M{o{bJT3mu!j0-_)1R5tFD-sD?IHs6+$lwE9Xj5>QSMVU zs$zX4JDp%3rNB-4w9@<tq{eArFylX+ZIi^W57pCD=jb)H-6Db;Yx^ocQY5;Br8B(S@ zhg@#iv~h>59&xHsTVn!fF`x5;~hP>8sT+l^%3+FvuGix zHvHvjad|gk%SM!ZinShb~M>l+$$CZmOS~7Ch`H}#IjMyV{BS_~Ey=lQe zG(?%unfklGQ^!;K?6(WZm@W0N)D1gFyf!G9^`n^)mf%jU^ogDKoy=Q;hYW4t7fX)P}; zr5?q)5t7zkqJa`{2aHoD^N*kLQCSiQLZPp%lxk>lBbU36BLgQkW9`<7q8rqjWdpZi zkBS@2;$A#5qWYq4;ED2PfsVB=xd#)yqzO2MpYmvDqy|K%%LPbFOU>gx<4LY}4FgizP3bl=IpU5}4?!vSxEZnLX zb#SHrJ{v04^8P`#H~hn^(Iji*fZ4-*-h+1=`O0>OYuWx-12a$oqR=5d_MfEL zsx@p~gT-9tsipy-Y!Dp-zX!nP%E&&hzUUVjYn{ALgJ!6eKs$4HUpVZ17wfJ%YQ2$Z zq=h)5zfGVv8+g<8h=A3`vrGjS~wn@_Pg;0qx>ChwgjzERaTiLB`#F71G}tjWF32;eorl` zo>r61>qqE3dS9%scs3`s`u8)Vu_Us&CM}lKRf7feeTDMsUkSrtlN$SA!c$_DK%{t# z4m&*qzYOu6=qU}WVomWuH)Dd6Ij~EGC{Ha7Gk7*NNkl^1hUTSIe^mFwI;w{mzg-qL z;5U#N>vRym{B^8CojNxuD`)OE_XmrQMp-C>Q|Eb{gs0iqfFzUn4(;ECE7aO)g=22^ zrHzt6INqIL!I?8;_?&AFk`f<}pBnMhJ zcCEZ({9eoBQ+@fYc^uZh{UPKQu}@ZS$!La}y)u)htMj!U`U#NnE~?AcZeDWmWPLcUzLoD$zK@nKl=Bg6dXn)Ak5HB-O(P}$N-ye}WKvvEIB zS{hJ^OLgwWP5kjx#p@tcK}CO7-~yR|6HAR*3(oe87Ukf#08~>3g{=_wudaI2p8BJB z>JD!~N}>O?={G&PcgSzhLb^*;cy#eO1Tbe?{LvjiYqsNrL~_{PwopYBL}~&4S-S7N z4mXbEOf<~#7`%#nEDfo;ic8l&+Qp|90)U^^knJCArBq-LVg14&Ln0jusTfS(GGNqz-W=nh7z7+BT@gLP{~2~tz65Y#S%RqH;d*~JfRGgGN>MVqi{Wo%{)2^TxJFpe1gGX^raIe#?M z`~akWSDi3W^!WdCRZxGoOC0??0BQmKzT539Qy^P;QX!K#^fqr&Go<9 zhPJ%L5;ssb68&G%M5PMikbQ12@J0${&e_l>e)~WWAdfTq79*y;WA!e=AQ8 z!jA%5eE_N@cy?ysK~MxXk{o#u!6r|VM>wwRz>ALko_PV^1O0(AfF^)p!F1}qOY#}K zi>3mMrwWns|L}9&W~vDkzz4_kAtqI>KUfgehRk~pgIb6e&}8sy(TnkKyW9pqZ`+y~ z33#!sB3DjskPPp~Mx`)_(3T^gp(MAh&NO~4a#ew%(q`4C-;wN1rg2>JwXkvGa$P$W%0prj7f}1 z1)Ls1feYnCIQ41FeS37oYqXCGTV{q)I}7+L_#rHKQN*I7 zZKS3jm@#1yGDG!K%J>GJuQA0jruGx1p1rG=KlIpQj!Xl$RI4@9(_$)WE69IQI;`a2ILD@-g5yB=I!{4 z{*y*6#~&JSlBT=+(liUd+?FoJd4_Xm!FQ4ys`|imO?Sydn!fIjar#MUw-$#%4_{-J zDqd(;1oenM5wN&U0CUeBDN=nJHp`1z9?WY|I(&;076H38pU7xU$2@MSDK_2hLf*tP zm#s?TBbNS|6gDnD5ZVQK{JF?reTScI7+XoyIG86670@?s`CcVKC{)JTx`93qvd`(~ zi^ohN>k8+Yb$y#Jvtdf^jG3#(nnXdFA@^f2kD=MYYq$1Qa#}xpQn!@w+95CBDY0w-N?D#ouPxF9EH~vTU4&peR=Exr&Zq z%ds&ne<2#yU1I|S^!82=Npv%CbOukjhCTH#G=9pC`0er2!|wT{jP%16Z)>gw!9mus z82I{|?N04&7ZgY8P!Dy0SL_LW_Cgy?ha9PPwxH`<;`Mk~?+#1qvR7I#W}|ze`&#TZ zQ3LBri#I1V6`t_)9z)tC+G*R!)zqj|6s(|iFmDR-;Vc)7pR1{@Tq%DzBR0Ojb=fSD z4V(`s`tK}27^+BtOj~lSh4j0aO{>r@K5E^Y{<+lLaVL4;qL=T)c-yk2b8-jswa_uS zcamFXql~zzW@pP)P?^xKO!+2~gE8#@y4%KQ#L&`TuCNB8bea~xR?sJE3awi);wtGM zJ&h3B720N$=sQD9VJV#65DYP5c#00n5XmIozJF}yUKbd=XS}P|ddQhj=okU{(l>V+ z?1gA6OCsv~WgWa3;i{55LfE7b%wMm{(O?7&6j%y+e2%h!zrQg?gEPw8pV_`pId?@> zk#!C1c)4@`wd_E<3jUSlOM&&3rtbwwwdIK*O@w|28?bm-3UExr95!QWvGAEq0nY|4)uAC|UvuKub)_VJdR@qd$I!FWG~teVT8*y8Nn zF0NY}r@GBckAp7S+rDUVMu9=qAAO<>r46{#zcI91klqO@N=gw(AGq~kn^na7L7E zh>-?s=Yc+Jgk6%*E(_{ezqx_fjJ_+s0UxFFmS<9N{p9JpMwP>Q#URl;*<+0`rgO!x z@#9-clxXVhK3e^cn9I3U>pfc%tVxwomV1)xVxip$x81~&PoDC54x(nxtDlEgBV7 z0&uQ9u!dwAxOT#{v4%_;;x!{am=x~2)2Bb1g9dAf0mbl-Ij1}n-++--fjQFSgPTRQUQLt$}DIG>uA!AR@ zRi8XJ6hyTUvyw((NN2-B@%Pp&p1C|<);!BK*)kzy0_00-IJEE4M|;IPB!tXRFA}ZL z0EG5u(S4e`)gYhvsTH$TqMkm3j&X4CRtaSZ1n=6<-(yI;%_JC%%)px88g~r&MFwF} zuf%1TMjoGQhuY6r=wr_qmztknLN)tiDJ|aHX1K=;-#u~g`>ob&{h?Rm%jg;GUR_}{ zaTN3KB#wy?XA?P=LO2;#JFdbh6KW1Qmht3eyO9N?%>9zELP5DyzP5XIj@M5F*gNo_ z7%oov!{*)Rl+cw|OkjM>v0sY8ZUpD0aK?UJRA9~A+{YXjWDmki4pO+e*eSPcFJ zsnHuNS2P2uBG>_MAksug8u^#!(K1qS!drK6Vz8@Vbb!UOLC4sU&p6WMvyBcAR-f3@ z#I6D{KN5@=6hZ0TPKGdjG<~9+75z6&(IZ&+Gy#;Yp4aEi?Eky_N0e-r*G`#tuz`3GsSQieum_LXyAYIu@RrJXB@eX?`1 zWcDDxnK+97z@yymm#a}X*FZLteKF&|bzzKn)iya^IvdWz;_J>6%UehhI?OR5sF zL0D4EJFlAQs6gJw5tB%l#Yc9^R?90wBrcxWQaPaktFtP^AP^u7I*7jAv73*Ga~NdSB!S7iu2RyJi+Z= zAAncK?5zBLMQwoQ2~W9Ktd0iT;}ymut2Nu)I_Sa3Rzwxz#z%b)e6*?q$`op15c( zc#vU*BI#YC-2*@S->(deQ33nwkN+hm+b`Q`G1~j`ej1Nf(m@ zCKyA5OP-1=ki81==q?f|jONbZTu>2VKWs%ND3HObD{HP@+%~Pqu(~p6FB>x0ocSzA z#V}7e>{_Dv@O{uv_u?T{9Am+>r=qAipuOEh(B-f{havMpnMgew9A9JUALnTz81I>< z^WJQ%bfo=n4`0h!VE*){xkwhI^QV(QJ`DGC{2$gp;IvuA;P%UiKj?4cv?WQPlA@`i zYbC?K(Sc$v;cccZ#E;vBMf?bUA~bl1(f%vtHI4$@0&}}4!FWJ6=M0u6sqS+0T*QLh z`_)Oauw2S^iwwh@c9>#!-B!9xe#Nlgp_YEPy5R|O(sn0lspI= zg#FPpq0#9>2n@PBH+h#fjQq_zFy3DusABtV* zd`nENsyODsVYPV`W)0RyLol|eUaJ_N5qOfs1&YXxOrz%UYUS>1vT?q`6zREG{@gcN zJ6~#Go?&MOn;n5I3Aqre0|{&e`+v#y|JNk*egood<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "nf-core/quantms v${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index dd125b4e..2ae0a86f 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -44,6 +44,28 @@ def print_error(error, context="Line", context_str=""): sys.exit(1) +def sniff_format(handle): + """ + Detect the tabular format. + + Args: + handle (text file): A handle to a `text file`_ object. The read position is + expected to be at the beginning (index 0). + + Returns: + csv.Dialect: The detected tabular format. + + .. _text file: + https://docs.python.org/3/glossary.html#term-text-file + + """ + peek = read_head(handle) + handle.seek(0) + sniffer = csv.Sniffer() + dialect = sniffer.sniff(peek) + return dialect + + def check_sdrf(check_ms, sdrf): df = SdrfDataFrame.parse(sdrf) errors = df.validate(DEFAULT_TEMPLATE) diff --git a/conf/base.config b/conf/base.config index 6f9d0207..7648d2cd 100644 --- a/conf/base.config +++ b/conf/base.config @@ -15,8 +15,8 @@ process { memory = { check_max( 8.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } - maxRetries = 2 + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 maxErrors = '-1' // Process-specific resource requirements diff --git a/conf/test_dia.config b/conf/test_dia.config index b6654bc6..666101ff 100644 --- a/conf/test_dia.config +++ b/conf/test_dia.config @@ -20,7 +20,6 @@ params { max_time = 48.h outdir = './results_dia' - tracedir = "${params.outdir}/pipeline_info" // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/dia_ci/PXD026600.sdrf.tsv' diff --git a/conf/test_full_dia.config b/conf/test_full_dia.config index 6c4f68f9..1cfdd16d 100644 --- a/conf/test_full_dia.config +++ b/conf/test_full_dia.config @@ -20,7 +20,6 @@ params { max_time = 48.h outdir = './results_dia_full' - tracedir = "${params.outdir}/pipeline_info" // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata-aws/dia_full/PXD004684.sdrf.tsv' diff --git a/conf/test_full_lfq.config b/conf/test_full_lfq.config index b93f1913..fe17eb44 100644 --- a/conf/test_full_lfq.config +++ b/conf/test_full_lfq.config @@ -20,7 +20,6 @@ params { max_time = 48.h outdir = "./results_lfq_full" - tracedir = "${params.outdir}/pipeline_info" // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata-aws/lfq_full/PXD001819.sdrf.tsv' diff --git a/conf/test_full_tmt.config b/conf/test_full_tmt.config index 99569fbe..d4b8469f 100644 --- a/conf/test_full_tmt.config +++ b/conf/test_full_tmt.config @@ -15,7 +15,6 @@ params { config_profile_description = 'Real full test dataset in isotopic labelling mode to check pipeline function and sanity of results' outdir = "./results_iso_full" - tracedir = "${params.outdir}/pipeline_info" max_cpus = 2 max_memory = 6.GB diff --git a/conf/test_lfq.config b/conf/test_lfq.config index 2e280626..cd2480ba 100644 --- a/conf/test_lfq.config +++ b/conf/test_lfq.config @@ -20,7 +20,6 @@ params { max_time = 48.h outdir = "./results_lfq" - tracedir = "${params.outdir}/pipeline_info" // Input data labelling_type = "label free sample" diff --git a/conf/test_localize.config b/conf/test_localize.config index 1129b2c7..3ed7a152 100644 --- a/conf/test_localize.config +++ b/conf/test_localize.config @@ -20,7 +20,6 @@ params { max_time = 1.h outdir = "./results_localize" - tracedir = "${params.outdir}/pipeline_info" // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/lfq_ci_phospho/test_phospho.sdrf' diff --git a/conf/test_tmt.config b/conf/test_tmt.config index 489ff442..7184f1a4 100644 --- a/conf/test_tmt.config +++ b/conf/test_tmt.config @@ -15,7 +15,6 @@ params { config_profile_description = 'Full test dataset in isotopic labelling mode to check pipeline function and sanity of results' outdir = "./results_iso" - tracedir = "${params.outdir}/pipeline_info" max_cpus = 2 max_memory = 6.GB diff --git a/docs/usage.md b/docs/usage.md index 84cc751d..003f6f93 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -33,6 +33,29 @@ work # Directory containing the nextflow working files # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +> āš ļø Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/quantms -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -49,6 +72,10 @@ First, go to the [nf-core/quantms releases page](https://github.com/nf-core/quan This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +> šŸ’” If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. + ## Core Nextflow arguments > **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). @@ -57,7 +84,7 @@ This version number will be logged in reports when you run the pipeline, so that Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. > We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. @@ -81,8 +108,10 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. ### `-resume` @@ -102,102 +131,19 @@ Each step in the pipeline has a default set of requirements for number of CPUs, Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: - -```console -[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' - -Caused by: - Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) - -Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - - -Command exit status: - 137 - -Command output: - (empty) - -Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. -Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb - -Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` -``` - -#### For beginners - -A first step to bypass this error, you could try to increase the amount of CPUs, memory, and time for the whole pipeline. Therefor you can try to increase the resource for the parameters `--max_cpus`, `--max_memory`, and `--max_time`. Based on the error above, you have to increase the amount of memory. Therefore you can go to the [parameter documentation of rnaseq](https://nf-co.re/rnaseq/3.9/parameters) and scroll down to the `show hidden parameter` button to get the default value for `--max_memory`. In this case 128GB, you than can try to run your pipeline again with `--max_memory 200GB -resume` to skip all process, that were already calculated. If you can not increase the resource of the complete pipeline, you can try to adapt the resource for a single process as mentioned below. - -#### Advanced option on process level - -To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/star/align/main.nf`. -If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). -The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. -The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. -Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. -The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. - -```nextflow -process { - withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { - memory = 100.GB - } -} -``` - -> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. -> -> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. - -### Updating containers (advanced users) - -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. - -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: - - - For Docker: +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +### Custom Containers - - For Singularity: +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. - - For Conda: +### Custom Tool Arguments - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 33cd4f6e..00000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,528 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 25a0a74a..408951ae 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -128,7 +128,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index db89bb98..0cb27015 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the main.nf workflow in the nf-core/quantms pipeline // +import nextflow.Nextflow + class WorkflowMain { // @@ -18,40 +20,11 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Generate help string - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) - } // Print workflow version and exit on --version if (params.version) { @@ -60,14 +33,6 @@ class WorkflowMain { System.exit(0) } - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) @@ -81,27 +46,12 @@ class WorkflowMain { // Check input has been provided if (!params.input) { - log.error "Please provide an input sdrf to the pipeline e.g. '--input *.sdrf.csv'" - System.exit(1) - } - - // Check input has been provided - if (!params.outdir) { - log.error "Please provide an outdir to the pipeline e.g. '--outdir ./results'" - System.exit(1) - } - - if (params.tracedir == "null/pipeline_info") - { - log.error """Error: Your tracedir is `null/pipeline_info`, this means you probably set outdir in a way that does not affect the default - `\$params.outdir/pipeline_info` (e.g., by specifying outdir in a profile instead of the commandline or through a `-params-file`. - Either set outdir in a correct way, or redefine tracedir as well (e.g., in your profile).""" - System.exit(1) + Nextflow.error("Please provide an input sdrf to the pipeline e.g. '--input *.sdrf.csv'") } // check fasta database has been provided if (!params.database) { - log.error "Please provide an fasta database to the pipeline e.g. '--database *.fasta'" + Nextflow.error("Please provide an fasta database to the pipeline e.g. '--database *.fasta'") } } } diff --git a/lib/WorkflowQuantms.groovy b/lib/WorkflowQuantms.groovy index 0ae28af2..1bcd9a66 100755 --- a/lib/WorkflowQuantms.groovy +++ b/lib/WorkflowQuantms.groovy @@ -1,6 +1,8 @@ // // This file holds several functions specific to the workflow/quantms.nf in the nf-core/quantms pipeline // + +import nextflow.Nextflow import groovy.text.SimpleTemplateEngine class WorkflowQuantms { @@ -10,8 +12,7 @@ class WorkflowQuantms { // public static void initialise(params, log) { if (!params.database) { - log.error "database file not specified with e.g. '--database *.fasta' or via a detectable config file." - System.exit(1) + Nextflow.error "database file not specified with e.g. '--database *.fasta' or via a detectable config file." } } @@ -42,15 +43,57 @@ class WorkflowQuantms { return yaml_file_text } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // + // Generate methods description for MultiQC + // + + public static String toolCitationText(params) { + + // TODO Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // TODO Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "

  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & KƤller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047ā€“3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() + // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + + def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() diff --git a/main.nf b/main.nf index 8b15941a..6db1c8b3 100644 --- a/main.nf +++ b/main.nf @@ -17,6 +17,22 @@ nextflow.enable.dsl = 2 ======================================================================================== */ +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} + WorkflowMain.initialise(workflow, params, log) /* diff --git a/modules.json b/modules.json index 011ee5f9..9bb0b423 100644 --- a/modules.json +++ b/modules.json @@ -7,12 +7,12 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7", "installed_by": ["modules"] } } diff --git a/modules/local/openms/epifany/main.nf b/modules/local/openms/epifany/main.nf index dbae49c7..afc6629c 100644 --- a/modules/local/openms/epifany/main.nf +++ b/modules/local/openms/epifany/main.nf @@ -2,8 +2,6 @@ process EPIFANY { label 'process_medium' label 'openms' - publishDir "${params.outdir}" - conda "bioconda::openms=2.9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_0' : diff --git a/modules/local/openms/proteomicslfq/main.nf b/modules/local/openms/proteomicslfq/main.nf index 4c061684..4ebb3fef 100644 --- a/modules/local/openms/proteomicslfq/main.nf +++ b/modules/local/openms/proteomicslfq/main.nf @@ -44,7 +44,7 @@ process PROTEOMICSLFQ { -quantification_method ${params.quantification_method} \\ -targeted_only ${params.targeted_only} \\ -mass_recalibration ${params.mass_recalibration} \\ - -transfer_ids ${params.transfer_ids} \\ + -transfer_ids ${params.transfer_ids == 'off' ? 'false' : params.transfer_ids} \\ -protein_quantification ${params.protein_quant} \\ -alignment_order ${params.alignment_order} \\ ${decoys_present} \\ diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 3df21765..800a6099 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.13" + conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index 60b546a0..c32657de 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,7 +1,9 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: - custom + - dump - version tools: - custom: diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 68f66bea..4b604749 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.13" + conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index ebc29b27..f93b5ee5 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: MultiQC description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: @@ -37,7 +38,7 @@ output: description: MultiQC report file pattern: "multiqc_report.html" - data: - type: dir + type: directory description: MultiQC data dir pattern: "multiqc_data" - plots: diff --git a/nextflow.config b/nextflow.config index 27d185eb..a7080a34 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,6 +16,9 @@ params { database = null acquisition_method = null + // Input options + input = null + // Tools flags posterior_probabilities = 'percolator' add_decoys = false @@ -142,7 +145,7 @@ params { quantification_method = 'feature_intensity' targeted_only = true mass_recalibration = false - transfer_ids = 'false' + transfer_ids = 'off' alignment_order = 'star' add_triqler_output = false quantify_decoys = false @@ -199,7 +202,6 @@ params { // Boilerplate options outdir = null publish_dir_mode = 'copy' - tracedir = "${params.outdir}/pipeline_info" email = null email_on_fail = null plaintext_email = false @@ -207,21 +209,17 @@ params { hook_url = null help = false version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'modules' - singularity_pull_docker_container = false // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - hostnames = [:] config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null - + // Max resource options // Defaults only, expecting to be overwritten @@ -229,6 +227,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -241,16 +246,17 @@ try { System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } +// Load nf-core/quantms custom profiles from different institutions. +// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// try { +// includeConfig "${params.custom_config_base}/pipeline/quantms.config" +// } catch (Exception e) { +// System.err.println("WARNING: Could not load nf-core/config/quantms profiles: ${params.custom_config_base}/pipeline/quantms.config") +// } + + profiles { debug { process.beforeScript = 'echo $HOSTNAME' } - ebicluster { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - conda.createTimeout = '1 h' - conda.useMamba = true - process.executor = 'lsf' - } conda { conda.enabled = true docker.enabled = false @@ -258,17 +264,6 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false - conda.useMamba = true - } - micromamba { - conda.enabled = true - conda.useMicromamba = true - conda.createTimeout = '1 h' - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false } mamba { conda.enabled = true @@ -279,14 +274,17 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } docker { docker.enabled = true docker.userEmulation = true + conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } arm { docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' @@ -294,37 +292,72 @@ profiles { singularity { singularity.enabled = true singularity.autoMounts = true + conda.enabled = false docker.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } podman { podman.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } shifter { shifter.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false charliecloud.enabled = false + apptainer.enabled = false } charliecloud { charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false + charliecloud.enabled = false } gitpod { executor.name = 'local' executor.cpus = 16 executor.memory = 60.GB } + micromamba { + conda.enabled = true + conda.useMicromamba = true + conda.createTimeout = '1 h' + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + ebicluster { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + conda.createTimeout = '1 h' + conda.useMamba = true + process.executor = 'lsf' + } test { includeConfig 'conf/test_lfq.config' } test_localize { includeConfig 'conf/test_localize.config' } test_tmt { includeConfig 'conf/test_tmt.config' } @@ -335,11 +368,8 @@ profiles { test_full_dia { includeConfig 'conf/test_full_dia.config' } test_full { includeConfig 'conf/test_full_lfq.config' } mambaci { includeConfig 'conf/mambaci.config' } -} -// Load module config after profile, so they can depend on overwritten input parameters specific for each profile. -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' +} // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. @@ -358,19 +388,19 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -379,7 +409,7 @@ manifest { homePage = 'https://github.com/nf-core/quantms' description = """Quantitative Mass Spectrometry nf-core workflow""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' + nextflowVersion = '!>=23.04.0' version = '1.2.0dev' doi = '10.5281/zenodo.7754148' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 3ad8eab0..70f17c05 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -14,7 +14,10 @@ "properties": { "input": { "type": "string", + "format": "file-path", + "exists": true, "mimetype": "text/tsv", + "pattern": "^\\S+\\.(?:tsv|sdrf)$", "description": "URI/path to an [SDRF](https://github.com/bigbio/proteomics-metadata-standard/tree/master/annotated-projects) file (.sdrf.tsv) **OR** [OpenMS-style experimental design](https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/classOpenMS_1_1ExperimentalDesign.html#details) with paths to spectra files (.tsv)", "help_text": "Input is specified by using a path or URI to a PRIDE Sample to Data Relation Format file (SDRF), e.g. as part of a submitted and\nannotated PRIDE experiment (see [here](https://github.com/bigbio/proteomics-metadata-standard/tree/master/annotated-projects) for examples). Input files will be downloaded and cached from the URIs specified in the SDRF file.\nAn OpenMS-style experimental design will be generated based on the factor columns of the SDRF. The settings for the\nfollowing parameters will currently be overwritten by the ones specified in the SDRF:\n\n * `fixed_mods`,\n * `variable_mods`,\n * `precursor_mass_tolerance`,\n * `precursor_mass_tolerance_unit`,\n * `fragment_mass_tolerance`,\n * `fragment_mass_tolerance_unit`,\n * `fragment_method`,\n * `enzyme`\n You can also specify an [OpenMS-style experimental design](https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/classOpenMS_1_1ExperimentalDesign.html#details) directly (.tsv ending). In this case, the aforementioned parameters have to be specified or defaults will be used.", "fa_icon": "fas fa-file-csv" @@ -67,6 +70,10 @@ "properties": { "database": { "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/fasta", + "pattern": "^\\S+\\.(?:fasta|fa)$", "description": "The `fasta` protein database used during database search. *Note:* For DIA data, it must not contain decoys.", "fa_icon": "fas fa-file", "help_text": "Since the database is not included in an SDRF, this parameter always needs to be given to specify the input protein database\nwhen you run the pipeline. Remember to include contaminants (and decoys if not in DIA mode and if not added in the pipeline with [`--add_decoys`](#add_decoys))\n\n```bash\n--database '[path to fasta protein database]'\n```" @@ -807,9 +814,9 @@ }, "transfer_ids": { "type": "string", - "description": "Tries a targeted requantification in files where an ID is missing, based on aggregate properties (i.e. RT) of the features in other aligned files (e.g. 'mean' of RT). (**WARNING:** increased memory consumption and runtime). 'false' turns this feature off. (default: 'false')", - "default": "false", - "enum": ["false", "mean"], + "description": "Tries a targeted requantification in files where an ID is missing, based on aggregate properties (i.e. RT) of the features in other aligned files (e.g. 'mean' of RT). (**WARNING:** increased memory consumption and runtime). 'off' turns this feature off. (default: 'off')", + "default": "off", + "enum": ["off", "mean"], "fa_icon": "fas fa-list-ol" }, "targeted_only": { @@ -1128,7 +1135,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -1145,12 +1152,14 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", + "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", + "default": false, "hidden": true }, "publish_dir_mode": { @@ -1174,6 +1183,7 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", + "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -1188,6 +1198,7 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", + "default": false, "hidden": true }, "hook_url": { @@ -1199,6 +1210,7 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true @@ -1220,13 +1232,6 @@ "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -1234,24 +1239,29 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", + "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, - "singularity_pull_docker_container": { + "validationFailUnrecognisedParams": { "type": "boolean", - "description": "This parameter force singularity to pull the contain from docker instead of using the singularity image", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "default": false, "hidden": true, - "fa_icon": "fas fa-bacon" + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, - "hostnames": { - "type": "string", - "description": "Institutional configs hostname.", + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "default": false, "hidden": true, - "fa_icon": "fas fa-users-cog" + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } diff --git a/tower.yml b/tower.yml new file mode 100644 index 00000000..787aedfe --- /dev/null +++ b/tower.yml @@ -0,0 +1,5 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" diff --git a/workflows/quantms.nf b/workflows/quantms.nf index 04705ab1..533c0793 100644 --- a/workflows/quantms.nf +++ b/workflows/quantms.nf @@ -1,21 +1,19 @@ /* -======================================================================================== - VALIDATE INPUTS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + PRINT PARAMS SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' -// Validate input parameters -WorkflowQuantms.initialise(params, log) +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) -// TODO nf-core: Add all file path parameters for the pipeline to the list below -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +WorkflowQuantms.initialise(params, log) /* ======================================================================================== @@ -80,9 +78,12 @@ workflow QUANTMS { // SUBWORKFLOW: Read in samplesheet, validate and stage input files // INPUT_CHECK ( - ch_input + file(params.input) ) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") + // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ + // ! There is currently no tooling to help you write a sample sheet schema // // SUBWORKFLOW: Create input channel @@ -173,7 +174,7 @@ workflow QUANTMS { workflow_summary = WorkflowQuantms.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - methods_description = WorkflowQuantms.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + methods_description = WorkflowQuantms.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) ch_methods_description = Channel.value(methods_description) ch_multiqc_files = Channel.empty() From f755690fa3f315932dad0a10458adf098882f38f Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 3 Sep 2023 00:15:44 -0700 Subject: [PATCH 038/113] updated example of cli run --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 6db1c8b3..27a20b57 100644 --- a/main.nf +++ b/main.nf @@ -23,7 +23,7 @@ include { validateParameters; paramsHelp } from 'plugin/nf-validation' if (params.help) { def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + def String command = "nextflow run ${workflow.manifest.name} --input input_files.sdrf.tsv --database ~/dbs/human_fasta.fasta -profile docker" log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) System.exit(0) } From 28f9773ec5141e6d66b5aa84944fa4c280df391b Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Thu, 7 Sep 2023 09:18:46 +0100 Subject: [PATCH 039/113] Fixing bug https://github.com/bigbio/quantms/issues/278 Fixing bug https://github.com/bigbio/quantms/issues/278 --- modules/local/openms/thirdparty/searchenginesage/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/local/openms/thirdparty/searchenginesage/main.nf b/modules/local/openms/thirdparty/searchenginesage/main.nf index 01b7abce..7307772e 100644 --- a/modules/local/openms/thirdparty/searchenginesage/main.nf +++ b/modules/local/openms/thirdparty/searchenginesage/main.nf @@ -24,6 +24,7 @@ process SEARCHENGINESAGE { outname = mzml_files.size() > 1 ? "out_${batch}" : mzml_files[0].baseName il_equiv = params.IL_equivalent ? "-PeptideIndexing:IL_equivalent" : "" + precursor_left = Double.parseDouble(meta.precursormasstolerance) """ SageAdapter \\ @@ -43,7 +44,7 @@ process SEARCHENGINESAGE { -precursor_tol_left ${-meta.precursormasstolerance} \\ -precursor_tol_right ${meta.precursormasstolerance} \\ -precursor_tol_unit $meta.precursormasstoleranceunit \\ - -fragment_tol_left ${-meta.fragmentmasstolerance} \\ + -fragment_tol_left ${-precursor_left} \\ -fragment_tol_right ${meta.fragmentmasstolerance} \\ -fragment_tol_unit $meta.fragmentmasstoleranceunit \\ -fixed_modifications ${meta.fixedmodifications.tokenize(',').collect{ "'${it}'" }.join(" ") } \\ From a190162d66ef8f9d9811d687707e695326bbd61c Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Thu, 7 Sep 2023 09:41:11 +0100 Subject: [PATCH 040/113] fix errors with Sage --- modules/local/openms/thirdparty/searchenginesage/main.nf | 3 +-- subworkflows/local/create_input_channel.nf | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/local/openms/thirdparty/searchenginesage/main.nf b/modules/local/openms/thirdparty/searchenginesage/main.nf index 7307772e..01b7abce 100644 --- a/modules/local/openms/thirdparty/searchenginesage/main.nf +++ b/modules/local/openms/thirdparty/searchenginesage/main.nf @@ -24,7 +24,6 @@ process SEARCHENGINESAGE { outname = mzml_files.size() > 1 ? "out_${batch}" : mzml_files[0].baseName il_equiv = params.IL_equivalent ? "-PeptideIndexing:IL_equivalent" : "" - precursor_left = Double.parseDouble(meta.precursormasstolerance) """ SageAdapter \\ @@ -44,7 +43,7 @@ process SEARCHENGINESAGE { -precursor_tol_left ${-meta.precursormasstolerance} \\ -precursor_tol_right ${meta.precursormasstolerance} \\ -precursor_tol_unit $meta.precursormasstoleranceunit \\ - -fragment_tol_left ${-precursor_left} \\ + -fragment_tol_left ${-meta.fragmentmasstolerance} \\ -fragment_tol_right ${meta.fragmentmasstolerance} \\ -fragment_tol_unit $meta.fragmentmasstoleranceunit \\ -fixed_modifications ${meta.fixedmodifications.tokenize(',').collect{ "'${it}'" }.join(" ") } \\ diff --git a/subworkflows/local/create_input_channel.nf b/subworkflows/local/create_input_channel.nf index 9499cc9b..b248c923 100644 --- a/subworkflows/local/create_input_channel.nf +++ b/subworkflows/local/create_input_channel.nf @@ -137,9 +137,9 @@ def create_meta_channel(LinkedHashMap row, is_sdrf, enzymes, files, wrapper) { meta.labelling_type = row.Label meta.fixedmodifications = row.FixedModifications meta.variablemodifications = row.VariableModifications - meta.precursormasstolerance = row.PrecursorMassTolerance + meta.precursormasstolerance = Double.parseDouble(row.PrecursorMassTolerance) meta.precursormasstoleranceunit = row.PrecursorMassToleranceUnit - meta.fragmentmasstolerance = row.FragmentMassTolerance + meta.fragmentmasstolerance = Double.parseDouble(row.FragmentMassTolerance) meta.fragmentmasstoleranceunit = row.FragmentMassToleranceUnit meta.enzyme = row.Enzyme From 9a797d44fcabcacbe769af32777bccc3a8146e78 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Thu, 7 Sep 2023 16:38:27 +0100 Subject: [PATCH 041/113] Update main.nf --- modules/local/openms/thirdparty/searchenginesage/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/openms/thirdparty/searchenginesage/main.nf b/modules/local/openms/thirdparty/searchenginesage/main.nf index 01b7abce..a9b671d8 100644 --- a/modules/local/openms/thirdparty/searchenginesage/main.nf +++ b/modules/local/openms/thirdparty/searchenginesage/main.nf @@ -1,6 +1,6 @@ process SEARCHENGINESAGE { tag "${metas.toList().collect{it.mzml_id}}" - label 'process_high' // we could make it dependent on the number of files + label 'process_medium' // we could make it dependent on the number of files conda "openms::openms-thirdparty=3.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? From 7b99c07165ab2742f6d8667480ff8b19fa51cf8f Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Thu, 7 Sep 2023 19:40:02 +0100 Subject: [PATCH 042/113] Update main.nf --- modules/local/openms/thirdparty/searchenginesage/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/openms/thirdparty/searchenginesage/main.nf b/modules/local/openms/thirdparty/searchenginesage/main.nf index a9b671d8..c1736aa2 100644 --- a/modules/local/openms/thirdparty/searchenginesage/main.nf +++ b/modules/local/openms/thirdparty/searchenginesage/main.nf @@ -26,6 +26,7 @@ process SEARCHENGINESAGE { il_equiv = params.IL_equivalent ? "-PeptideIndexing:IL_equivalent" : "" """ + export SAGE_LOG=trace SageAdapter \\ -in ${mzml_files} \\ -out ${outname}_sage.idXML \\ From 88ef3f3ec81cfbc72129726db0979a489a9616df Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sat, 9 Sep 2023 21:45:36 -0700 Subject: [PATCH 043/113] fixed linting on decompress dotd nf file --- modules/local/decompress_dotd/main.nf | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/modules/local/decompress_dotd/main.nf b/modules/local/decompress_dotd/main.nf index 3e091f39..de170d33 100644 --- a/modules/local/decompress_dotd/main.nf +++ b/modules/local/decompress_dotd/main.nf @@ -9,13 +9,13 @@ process DECOMPRESS { stageInMode { if (task.attempt == 1) { - if (executor == "awsbatch") { + if (executor == 'awsbatch') { 'symlink' } else { 'link' } } else if (task.attempt == 2) { - if (executor == "awsbatch") { + if (executor == 'awsbatch') { 'copy' } else { 'symlink' @@ -29,12 +29,12 @@ process DECOMPRESS { tuple val(meta), path(compressed_file) output: - tuple val(meta), path("*.d"), emit: decompressed_files - path "versions.yml", emit: version - path "*.log", emit: log + tuple val(meta), path('*.d'), emit: decompressed_files + path 'versions.yml', emit: version + path '*.log', emit: log script: - def prefix = task.ext.prefix ?: "${meta.mzml_id}" + String prefix = task.ext.prefix ?: "${meta.mzml_id}" """ function extract { @@ -42,14 +42,14 @@ process DECOMPRESS { echo "Usage: extract ." else if [ -f \$1 ]; then - case \$1 in - *.tar.gz) tar xvzf \$1 ;; - *.gz) gunzip \$1 ;; - *.tar) tar xvf \$1 ;; - *) echo "extract: '\$1' - unknown archive method" ;; - esac + case \$1 in + *.tar.gz) tar xvzf \$1 ;; + *.gz) gunzip \$1 ;; + *.tar) tar xvf \$1 ;; + *) echo "extract: '\$1' - unknown archive method" ;; + esac else - echo "\$1 - file does not exist" + echo "\$1 - file does not exist" fi fi } From bbd8f63f85f7c3fe32d55d69419ff8a7d64b6e8b Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sat, 9 Sep 2023 21:48:51 -0700 Subject: [PATCH 044/113] fixed inverted rows error (code review) --- bin/diann_convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index eaa31f52..18599022 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -1059,7 +1059,7 @@ def make_lookup_dict(self, report) -> Dict[str, Tuple[str, float]]: # 103588 NPVGYPLAWQFLR Q9NZ08;Q9NZ08-2 0.000252 out = { - row["Protein.Ids"]: (row["Global.PG.Q.Value"], row["Modified.Sequence"]) for _, row in grouped_df.iterrows() + row["Protein.Ids"]: (row["Modified.Sequence"], row["Global.PG.Q.Value"]) for _, row in grouped_df.iterrows() } return out From 0b1102349111a776d9c10ce45598ff13f67377c0 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sat, 9 Sep 2023 22:15:41 -0700 Subject: [PATCH 045/113] changed lookup key for the PRH best score --- bin/diann_convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index 18599022..34e31645 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -621,7 +621,7 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): logger.debug("Matching PRH to best search engine score...") score_looker = ModScoreLooker(report) out_mztab_PRH[["modifiedSequence", "best_search_engine_score[1]"]] = out_mztab_PRH.apply( - lambda x: score_looker.get_score(x["accession"]), axis=1, result_type="expand" + lambda x: score_looker.get_score(x["Protein.Ids"]), axis=1, result_type="expand" ) logger.debug("Matching PRH to modifications...") From bcc5295d466fc533dc4b0a27f448d24d54f456b2 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 10 Sep 2023 01:50:19 -0700 Subject: [PATCH 046/113] fixed error that arose from fixing merge conflicts --- subworkflows/local/file_preparation.nf | 5 ----- workflows/dia.nf | 4 +++- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/file_preparation.nf b/subworkflows/local/file_preparation.nf index da9d03c6..20c4734b 100644 --- a/subworkflows/local/file_preparation.nf +++ b/subworkflows/local/file_preparation.nf @@ -70,10 +70,6 @@ workflow FILE_PREPARATION { ch_versions = ch_versions.mix(THERMORAWFILEPARSER.out.version) ch_results = ch_results.mix(THERMORAWFILEPARSER.out.mzmls_converted) - MZMLINDEXING( ch_branched_input_mzMLs.nonIndexedMzML ) - ch_versions = ch_versions.mix(MZMLINDEXING.out.version) - ch_results = ch_results.mix(MZMLINDEXING.out.mzmls_indexed) - ch_results.map{ it -> [it[0], it[1]] }.set{ indexed_mzml_bundle } // Exctract qc data from .d files @@ -85,7 +81,6 @@ workflow FILE_PREPARATION { ch_versions = ch_versions.mix(DOTD2MQC_INDIVIDUAL.out.version) ch_versions = ch_versions.mix(DOTD2MQC_AGGREGATE.out.version) - // Convert .d files to mzML if (params.convert_dotd) { TDF2MZML( ch_branched_input.dotd ) diff --git a/workflows/dia.nf b/workflows/dia.nf index f6b5eeab..506d646d 100644 --- a/workflows/dia.nf +++ b/workflows/dia.nf @@ -97,8 +97,10 @@ workflow DIA { // Order matters in DIANN, This should be sorted for reproducible results. // NOTE: I am getting here the names of the ms files, not the path. // Since the next step only needs the name (since it uses the cached .quant) + // Also note that I am converting to a file object here because when executing + // locally, evey element in ch_result is a string, whilst on cloud it is a path. ch_result - .ms_file.map { msfile -> msfile.getName() } + .ms_file.map { msfile -> file(msfile).getName() } .collect() .set { ms_file_names } DIANNSUMMARY(ms_file_names, meta, ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library, From 96b19b210e346a361133016461eaa83dec22ae8a Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Fri, 15 Sep 2023 09:22:07 +0100 Subject: [PATCH 047/113] update pmultiqc 0.0.20 -> 0.0.19 --- modules/local/diannconvert/main.nf | 6 +++--- modules/local/pmultiqc/main.nf | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/diannconvert/main.nf b/modules/local/diannconvert/main.nf index beab3c96..44ebae27 100644 --- a/modules/local/diannconvert/main.nf +++ b/modules/local/diannconvert/main.nf @@ -2,11 +2,11 @@ process DIANNCONVERT { tag "$meta.experiment_id" label 'process_medium' - conda "conda-forge::pandas_schema conda-forge::lzstring bioconda::pmultiqc=0.0.19" + conda "conda-forge::pandas_schema conda-forge::lzstring bioconda::pmultiqc=0.0.20" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/pmultiqc:0.0.19--pyhdfd78af_0" + container "https://depot.galaxyproject.org/singularity/pmultiqc:0.0.20--pyhdfd78af_0" } else { - container "quay.io/biocontainers/pmultiqc:0.0.19--pyhdfd78af_0" + container "quay.io/biocontainers/pmultiqc:0.0.20--pyhdfd78af_0" } input: diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index 0cbf679f..484cf39c 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -1,11 +1,11 @@ process PMULTIQC { label 'process_high' - conda "conda-forge::pandas_schema conda-forge::lzstring bioconda::pmultiqc=0.0.19" + conda "conda-forge::pandas_schema conda-forge::lzstring bioconda::pmultiqc=0.0.20" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/pmultiqc:0.0.19--pyhdfd78af_0" + container "https://depot.galaxyproject.org/singularity/pmultiqc:0.0.20--pyhdfd78af_0" } else { - container "quay.io/biocontainers/pmultiqc:0.0.19--pyhdfd78af_0" + container "quay.io/biocontainers/pmultiqc:0.0.20--pyhdfd78af_0" } input: From 6a78acb90e1b5991b292445f1f6e493713ded4e1 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Fri, 15 Sep 2023 09:23:11 +0100 Subject: [PATCH 048/113] fix small typo --- modules/local/openms/thirdparty/luciphoradapter/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/openms/thirdparty/luciphoradapter/main.nf b/modules/local/openms/thirdparty/luciphoradapter/main.nf index b0fe4de6..616c40b6 100644 --- a/modules/local/openms/thirdparty/luciphoradapter/main.nf +++ b/modules/local/openms/thirdparty/luciphoradapter/main.nf @@ -17,7 +17,7 @@ process LUCIPHORADAPTER { path "*.log", emit: log script: - // The OpenMS adapters need the actuall jar file, not the executable/shell wrapper that (bio)conda creates + // The OpenMS adapters need the actual jar file, not the executable/shell wrapper that (bio)conda creates luciphor_jar = '' if (workflow.containerEngine || (task.executor == "awsbatch")) { luciphor_jar = "-executable \$(find /usr/local/share/luciphor2-*/luciphor2.jar -maxdepth 0)" From a860ade5d655f9d50b7dbebaad71f3da83243b7d Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Fri, 15 Sep 2023 15:06:43 +0100 Subject: [PATCH 049/113] do not remove a plot for all proteins in msstats_tmt.R --- bin/msstats_tmt.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/msstats_tmt.R b/bin/msstats_tmt.R index 61e1fb57..c975ac5a 100755 --- a/bin/msstats_tmt.R +++ b/bin/msstats_tmt.R @@ -814,7 +814,7 @@ processed.quant <- proteinSummarization(quant, method=args[8],remove_empty_chann reference_norm=reference_norm, remove_norm_channel=remove_norm_channel ) -dataProcessPlotsTMT(processed.quant, "ProfilePlot", width=12, height=12, which.Protein="all") +dataProcessPlotsTMT(processed.quant, "ProfilePlot", width=12, height=12, which.Protein="allonly") dataProcessPlotsTMT(processed.quant, "QCPlot", width=12, height=12, which.Protein="allonly") lvls <- levels(as.factor(processed.quant$ProteinLevelData$Condition)) From 07a6ed2d36b467691f22910121e9c4fc98dc42e8 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 15 Sep 2023 07:45:53 -0700 Subject: [PATCH 050/113] updated pmultiqc version --- modules/local/pmultiqc/main.nf | 13 +++---------- subworkflows/local/file_preparation.nf | 6 +++--- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index c0e1fbee..10dd9c9b 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -1,11 +1,11 @@ process PMULTIQC { label 'process_high' - conda "conda-forge::pandas_schema conda-forge::lzstring bioconda::pmultiqc=0.0.19" + conda "conda-forge::pandas_schema conda-forge::lzstring bioconda::pmultiqc=0.0.20" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/pmultiqc:0.0.19--pyhdfd78af_0" + container "https://depot.galaxyproject.org/singularity/pmultiqc:0.0.20--pyhdfd78af_0" } else { - container "quay.io/biocontainers/pmultiqc:0.0.19--pyhdfd78af_0" + container "quay.io/biocontainers/pmultiqc:0.0.20--pyhdfd78af_0" } input: @@ -34,13 +34,6 @@ process PMULTIQC { echo ">>>>>>>>> Experimental Design <<<<<<<<<" cat results/*openms_design.tsv - # I attempted making this expression match prior - # to tabs but that does not seem to work (it might be a groovy escaping issue) - # and should be fixed when https://github.com/bigbio/pmultiqc/issues/80 - # gets solved. - # Current hack to attempt matching file stems and not file extensions - sed -i -E "s/((\\.tar)|(\\.gz)|(\\.tar\\.gz))//g" results/*openms_design.tsv - echo ">>>>>>>>> Experimental Design <<<<<<<<<" cat results/*openms_design.tsv diff --git a/subworkflows/local/file_preparation.nf b/subworkflows/local/file_preparation.nf index 20c4734b..b8592031 100644 --- a/subworkflows/local/file_preparation.nf +++ b/subworkflows/local/file_preparation.nf @@ -25,12 +25,12 @@ workflow FILE_PREPARATION { ch_rawfiles .branch { dottar: WorkflowQuantms.hasExtension(it[1], '.tar') - dotgz: WorkflowQuantms.hasExtension(it[1], '.tar') + dotzip: WorkflowQuantms.hasExtension(it[1], '.zip') gz: WorkflowQuantms.hasExtension(it[1], '.gz') uncompressed: true }.set { ch_branched_input } - compressed_files = ch_branched_input.dottar.mix(ch_branched_input.dotgz, ch_branched_input.gz) + compressed_files = ch_branched_input.dottar.mix(ch_branched_input.dotzip, ch_branched_input.gz) DECOMPRESS(compressed_files) ch_versions = ch_versions.mix(DECOMPRESS.out.version) ch_rawfiles = ch_branched_input.uncompressed.mix(DECOMPRESS.out.decompressed_files) @@ -91,7 +91,7 @@ workflow FILE_PREPARATION { ch_results = indexed_mzml_bundle.mix(ch_branched_input.dotd) } - MZMLSTATISTICS( indexed_mzml_bundle ) + MZMLSTATISTICS(indexed_mzml_bundle) ch_statistics = ch_statistics.mix(MZMLSTATISTICS.out.mzml_statistics.collect()) ch_versions = ch_versions.mix(MZMLSTATISTICS.out.version) From c1048f9fc45b775a312f85b084e68521b54ef969 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Fri, 15 Sep 2023 16:51:03 +0100 Subject: [PATCH 051/113] improving ProfileTMT export plot --- bin/msstats_tmt.R | 34 +++++++++++++++++++++++++++++++- modules/local/msstatstmt/main.nf | 1 + nextflow.config | 3 ++- nextflow_schema.json | 6 ++++++ 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/bin/msstats_tmt.R b/bin/msstats_tmt.R index c975ac5a..31754f71 100755 --- a/bin/msstats_tmt.R +++ b/bin/msstats_tmt.R @@ -4,6 +4,26 @@ # License: Apache 2.0 # Author: Dai Chengxin, Julianus Pfeuffer, Yasset Perez-Riverol +# Rscript bin/msstats_tmt.R PDC000114.sdrf_openms_design_msstats_in.csv "pairwise" "" true true true \ +# sum msstats true true true PDC000114.sdrf_openms_design_msstats_in 0.05 false +# Parameters: +# 1. input csv file +# 2. contrast type: pairwise +# 3. control condition: "" +# 4. rmProtein_with1Feature: true or false +# 5. useUniquePeptide: true or false +# 6. rmPSM_withfewMea_withinRun: true or false +# 7. summarization method: sum or max +# 8. summarization method to protein-level: msstats(default) +# 9. global median normalization on peptide level data: true or false +# 10. remove norm channel: true or false +# 11. reference norm channel: true or false +# 12. output prefix: PDC000114.sdrf_openms_design_msstats_in +# 13. adjusted p-value threshold: 0.05 +# 14. generate profile plot: true or false (default) + + + require(MSstatsTMT) require(stats) require(gplots) @@ -799,6 +819,15 @@ if (length(args)<13) { args[13] <- 0.05 } +if (length(args) < 14) { + # Plot profile plot for all proteins + args[14] <- FALSE +} +plot_profile_all <- args[14] +if(typeof(plot_profile_all) == 'character'){ + plot_profile_all <- char_to_boolean[plot_profile_all] +} + csv_input <- args[1] contrast_str <- args[2] control_str <- args[3] @@ -814,7 +843,10 @@ processed.quant <- proteinSummarization(quant, method=args[8],remove_empty_chann reference_norm=reference_norm, remove_norm_channel=remove_norm_channel ) -dataProcessPlotsTMT(processed.quant, "ProfilePlot", width=12, height=12, which.Protein="allonly") +if (plot_profile_all) { + dataProcessPlotsTMT(processed.quant, "ProfilePlot", width=12, height=12, which.Protein="all") +} + dataProcessPlotsTMT(processed.quant, "QCPlot", width=12, height=12, which.Protein="allonly") lvls <- levels(as.factor(processed.quant$ProteinLevelData$Condition)) diff --git a/modules/local/msstatstmt/main.nf b/modules/local/msstatstmt/main.nf index 4e6b60b1..42bb10c9 100644 --- a/modules/local/msstatstmt/main.nf +++ b/modules/local/msstatstmt/main.nf @@ -39,6 +39,7 @@ process MSSTATSTMT { ${params.msstatsiso_reference_normalization} \\ ${msstatstmt_csv_input.baseName} \\ ${params.msstats_threshold} \\ + ${params.msstats_plot_profile_qc} \\ $args \\ 2>&1 | tee msstats_tmt.log diff --git a/nextflow.config b/nextflow.config index 6f2f3aa4..06dd2bca 100644 --- a/nextflow.config +++ b/nextflow.config @@ -188,6 +188,7 @@ params { msstatsiso_global_norm = true msstatsiso_remove_norm_channel = true msstatsiso_reference_normalization = true + msstats_plot_profile_qc = false // PTXQC enable_qc = false @@ -221,7 +222,7 @@ params { config_profile_description = null config_profile_contact = null config_profile_url = null - + // Max resource options // Defaults only, expecting to be overwritten diff --git a/nextflow_schema.json b/nextflow_schema.json index 60aadf10..70c48be3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1032,6 +1032,12 @@ "description": "Reference channel based normalization between MS runs on protein level data", "fa_icon": "far fa-check-square", "default": true + }, + "msstats_plot_profile_qc": { + "type": "boolean", + "description": "Export MSstats profile QC plots including all proteins", + "fa_icon": "far fa-check-square", + "default": false } }, "fa_icon": "fab fa-r-project" From 5f6c7c1676e646816f3ffc0facc3044d087fe624 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Sat, 16 Sep 2023 12:08:45 +0100 Subject: [PATCH 052/113] included the QCPlot --- bin/msstats_tmt.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/msstats_tmt.R b/bin/msstats_tmt.R index 31754f71..a9e56092 100755 --- a/bin/msstats_tmt.R +++ b/bin/msstats_tmt.R @@ -845,9 +845,10 @@ processed.quant <- proteinSummarization(quant, method=args[8],remove_empty_chann if (plot_profile_all) { dataProcessPlotsTMT(processed.quant, "ProfilePlot", width=12, height=12, which.Protein="all") + dataProcessPlotsTMT(processed.quant, "QCPlot", width=12, height=12, which.Protein="allonly") } -dataProcessPlotsTMT(processed.quant, "QCPlot", width=12, height=12, which.Protein="allonly") + lvls <- levels(as.factor(processed.quant$ProteinLevelData$Condition)) l <- length(lvls) From f35a4316647333431ee8b927c27ed304fa97d46e Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Sat, 16 Sep 2023 12:11:33 +0100 Subject: [PATCH 053/113] msstats change. --- bin/msstats_tmt.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/bin/msstats_tmt.R b/bin/msstats_tmt.R index a9e56092..c6b6dbff 100755 --- a/bin/msstats_tmt.R +++ b/bin/msstats_tmt.R @@ -848,8 +848,6 @@ if (plot_profile_all) { dataProcessPlotsTMT(processed.quant, "QCPlot", width=12, height=12, which.Protein="allonly") } - - lvls <- levels(as.factor(processed.quant$ProteinLevelData$Condition)) l <- length(lvls) From d50529b07857a79fbf17c1961f8c632e9b88357e Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Mon, 18 Sep 2023 06:17:08 +0100 Subject: [PATCH 054/113] update sdrf-pipelines 0.0.24 --- modules/local/preprocess_expdesign.nf | 6 +++--- modules/local/samplesheet_check.nf | 6 +++--- modules/local/sdrfparsing/main.nf | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/local/preprocess_expdesign.nf b/modules/local/preprocess_expdesign.nf index 53aa4ba5..a04ee22f 100644 --- a/modules/local/preprocess_expdesign.nf +++ b/modules/local/preprocess_expdesign.nf @@ -6,11 +6,11 @@ process PREPROCESS_EXPDESIGN { tag "$design.Name" label 'process_low' - conda "bioconda::sdrf-pipelines=0.0.23" + conda "bioconda::sdrf-pipelines=0.0.24" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.23--pyhdfd78af_0" + container "https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.24--pyhdfd78af_0" } else { - container "quay.io/biocontainers/sdrf-pipelines:0.0.23--pyhdfd78af_0" + container "quay.io/biocontainers/sdrf-pipelines:0.0.24--pyhdfd78af_0" } input: diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index e70a8e54..eb5b5467 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -3,11 +3,11 @@ process SAMPLESHEET_CHECK { tag "$input_file" label 'process_single' - conda "bioconda::sdrf-pipelines=0.0.23" + conda "bioconda::sdrf-pipelines=0.0.24" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.23--pyhdfd78af_0" + container "https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.24--pyhdfd78af_0" } else { - container "quay.io/biocontainers/sdrf-pipelines:0.0.23--pyhdfd78af_0" + container "quay.io/biocontainers/sdrf-pipelines:0.0.24--pyhdfd78af_0" } input: diff --git a/modules/local/sdrfparsing/main.nf b/modules/local/sdrfparsing/main.nf index e5c86480..8a7f4df0 100644 --- a/modules/local/sdrfparsing/main.nf +++ b/modules/local/sdrfparsing/main.nf @@ -2,10 +2,10 @@ process SDRFPARSING { tag "$sdrf.Name" label 'process_low' - conda "conda-forge::pandas_schema bioconda::sdrf-pipelines=0.0.23" + conda "conda-forge::pandas_schema bioconda::sdrf-pipelines=0.0.24" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.23--pyhdfd78af_0' : - 'quay.io/biocontainers/sdrf-pipelines:0.0.23--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.24--pyhdfd78af_0' : + 'quay.io/biocontainers/sdrf-pipelines:0.0.24--pyhdfd78af_0' }" input: path sdrf From b72e00034c4d701b2b823cb3f2b70efe2abce3d6 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 19 Sep 2023 09:54:20 -0700 Subject: [PATCH 055/113] Maintainance/finish integration with bigbio (#5) * changed files to paths in final diann analysis * updated decompression to support zip * added exception when the decompressed file already matches the required pattern --- bin/dotd_2_mqc.py | 24 +++++++-------- modules/local/decompress_dotd/main.nf | 11 +++++-- .../local/individual_final_analysis/main.nf | 16 +++++----- modules/local/pmultiqc/main.nf | 3 -- modules/local/sdrfparsing/main.nf | 30 +++++++++---------- 5 files changed, 42 insertions(+), 42 deletions(-) diff --git a/bin/dotd_2_mqc.py b/bin/dotd_2_mqc.py index dc868b09..66764018 100755 --- a/bin/dotd_2_mqc.py +++ b/bin/dotd_2_mqc.py @@ -177,7 +177,7 @@ def get_acquisition_datetime(self) -> str: raise RuntimeError("More than one acquisition datetime found.") return out[0][0] - + def get_tot_current(self) -> float: """Gets the total current from the ms1 scans. @@ -198,7 +198,7 @@ def get_tot_current(self) -> float: raise RuntimeError("More than one total current found.") return out[0][0] - + def get_dia_scan_current(self) -> float: """Gets the total current from the ms2 scans. @@ -294,26 +294,26 @@ def main_single(input_path, output_path): main_aggregate(output_path, output_path) logger.info("Done.") - + def main_aggregate(input_path, output_path): # Find the general stats files if not input_path.is_dir(): logger.error(f"Input path {input_path} is not a directory.") raise ValueError("Input path must be a directory.") - + general_stats_files = list(input_path.glob("general_stats_*.tsv")) if not general_stats_files: logger.error(f"No general stats files found in {input_path}.") raise ValueError("No general stats files found.") - + # Merge them to a single table # Effectively transposing the columns and adding column called file, # which contains the file name from which the stats were acquired. logger.info("Merging general stats files.") general_stats = [] for f in general_stats_files: - curr_stats = {'file': f.stem.replace("general_stats_", "")} + curr_stats = {"file": f.stem.replace("general_stats_", "")} with f.open("r") as fh: for line in fh: line = line.strip() @@ -323,7 +323,7 @@ def main_aggregate(input_path, output_path): curr_stats[k] = v general_stats.append(curr_stats) - + # Write the general stats file logger.info("Writing general stats file.") with (output_path / "general_stats.tsv").open("w") as f: @@ -337,19 +337,19 @@ def main_aggregate(input_path, output_path): parser = argparse.ArgumentParser(add_help=True, usage=GENERAL_HELP) parser.add_argument("--version", action="version", version=f"%(prog)s {VERSION}") subparsers = parser.add_subparsers(required=True) - + # create the parser for the "single" command - parser_foo = subparsers.add_parser('single') + parser_foo = subparsers.add_parser("single") parser_foo.add_argument("input", help="Input .d file or directory of .d files.") parser_foo.add_argument("output", help="Output directory.") parser_foo.set_defaults(func=main_single) - + # create the parser for the "aggregate" command - parser_bar = subparsers.add_parser('aggregate') + parser_bar = subparsers.add_parser("aggregate") parser_bar.add_argument("input", help="Directory that contains the general stats files to aggregate.") parser_bar.add_argument("output", help="Output directory.") parser_bar.set_defaults(func=main_aggregate) - + # parse the args and call whatever function was selected args, unkargs = parser.parse_known_args() if unkargs: diff --git a/modules/local/decompress_dotd/main.nf b/modules/local/decompress_dotd/main.nf index de170d33..e96ef7fa 100644 --- a/modules/local/decompress_dotd/main.nf +++ b/modules/local/decompress_dotd/main.nf @@ -2,10 +2,9 @@ process DECOMPRESS { tag "$meta.mzml_id" label 'process_low' - label 'process_single' label 'error_retry' - container 'continuumio/miniconda3:23.5.2-0-alpine' + container 'ghcr.io/jspaezp/miniconda-alpine-zip:v23.5.2-0-alpine' stageInMode { if (task.attempt == 1) { @@ -46,6 +45,7 @@ process DECOMPRESS { *.tar.gz) tar xvzf \$1 ;; *.gz) gunzip \$1 ;; *.tar) tar xvf \$1 ;; + *.zip) unzip \$1 ;; *) echo "extract: '\$1' - unknown archive method" ;; esac else @@ -56,16 +56,21 @@ process DECOMPRESS { tar --help 2>&1 | tee -a ${prefix}_decompression.log gunzip --help 2>&1 | tee -a ${prefix}_decompression.log + zip --help 2>&1 | tee -a ${prefix}_decompression.log echo "Unpacking..." | tee -a ${compressed_file.baseName}_decompression.log extract ${compressed_file} 2>&1 | tee -a ${compressed_file.baseName}_conversion.log - mv *.d ${file(compressed_file.baseName).baseName}.d + [ -d ${file(compressed_file.baseName).baseName}.d ] && \\ + echo "Found ${file(compressed_file.baseName).baseName}.d" || \\ + mv *.d ${file(compressed_file.baseName).baseName}.d + ls -l | tee -a ${compressed_file.baseName}_decompression.log cat <<-END_VERSIONS > versions.yml "${task.process}": gunzip: \$(gunzip --help 2>&1 | head -1 | grep -oE "\\d+\\.\\d+(\\.\\d+)?") tar: \$(tar --help 2>&1 | head -1 | grep -oE "\\d+\\.\\d+(\\.\\d+)?") + zip: \$(zip --help | head -2 | tail -1 | grep -oE "\\d+\\.\\d+") END_VERSIONS """ } diff --git a/modules/local/individual_final_analysis/main.nf b/modules/local/individual_final_analysis/main.nf index 3c8abf1d..b1d26be4 100644 --- a/modules/local/individual_final_analysis/main.nf +++ b/modules/local/individual_final_analysis/main.nf @@ -1,5 +1,5 @@ process INDIVIDUAL_FINAL_ANALYSIS { - tag "$mzML.baseName" + tag "$ms_file.baseName" label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -7,7 +7,7 @@ process INDIVIDUAL_FINAL_ANALYSIS { 'biocontainers/diann:v1.8.1_cv1' }" input: - tuple val(meta), file(mzML), file(fasta), file(diann_log), file(library) + tuple val(meta), path(ms_file), path(fasta), path(diann_log), path(library) output: path "*.quant", emit: diann_quant @@ -30,23 +30,21 @@ process INDIVIDUAL_FINAL_ANALYSIS { } """ - # Question: why is this using echo? wouldnt just the variable replacement do the same? - diann --lib ${library} \\ - --f ${mzML} \\ + --f ${ms_file} \\ --fasta ${fasta} \\ --threads ${task.cpus} \\ --verbose $params.diann_debug \\ --temp ./ \\ - --mass-acc \$(echo ${mass_acc_ms2}) \\ - --mass-acc-ms1 \$(echo ${mass_acc_ms1}) \\ - --window \$(echo ${scan_window}) \\ + --mass-acc ${mass_acc_ms2} \\ + --mass-acc-ms1 ${mass_acc_ms1} \\ + --window ${scan_window} \\ --no-ifs-removal \\ --no-main-report \\ --relaxed-prot-inf \\ --pg-level $params.pg_level \\ $args \\ - 2>&1 | tee ${mzML.baseName}_final_diann.log + 2>&1 | tee ${ms_file.baseName}_final_diann.log cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index 10dd9c9b..84587bfb 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -34,9 +34,6 @@ process PMULTIQC { echo ">>>>>>>>> Experimental Design <<<<<<<<<" cat results/*openms_design.tsv - echo ">>>>>>>>> Experimental Design <<<<<<<<<" - cat results/*openms_design.tsv - echo ">>>>>>>>> Running Multiqc <<<<<<<<<" multiqc \\ -f \\ diff --git a/modules/local/sdrfparsing/main.nf b/modules/local/sdrfparsing/main.nf index 8bed7d64..ea209ae8 100644 --- a/modules/local/sdrfparsing/main.nf +++ b/modules/local/sdrfparsing/main.nf @@ -2,10 +2,10 @@ process SDRFPARSING { tag "$sdrf.Name" label 'process_low' - conda "conda-forge::pandas_schema bioconda::sdrf-pipelines=0.0.23" + conda "conda-forge::pandas_schema bioconda::sdrf-pipelines=0.0.24" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.23--pyhdfd78af_0' : - 'quay.io/biocontainers/sdrf-pipelines:0.0.23--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.24--pyhdfd78af_0' : + 'quay.io/biocontainers/sdrf-pipelines:0.0.24--pyhdfd78af_0' }" input: path sdrf @@ -18,26 +18,26 @@ process SDRFPARSING { script: def args = task.ext.args ?: '' - // TODO Read the `convert_dotd` parameter and dispatch parameters accprdingly + if (params.convert_dotd) { + extensionconversions = ",.d.gz:.mzML,.d.tar.gz:.mzML,d.tar:.mzML,.d.zip:.mzML,.d:.mzML" + } else { + extensionconversions = ",.gz:,.tar.gz:,.tar:,.zip:" + } """ ## -t2 since the one-table format parser is broken in OpenMS2.5 ## -l for legacy behavior to always add sample columns - - parse_sdrf convert-openms -t2 -l --extension_convert raw:mzML -s ${sdrf} 2>&1 | tee ${sdrf.baseName}_parsing.log + + parse_sdrf convert-openms \\ + -t2 -l \\ + --extension_convert raw:mzML$extensionconversions \\ + -s ${sdrf} \\ + $args \\ + 2>&1 | tee ${sdrf.baseName}_parsing.log mv openms.tsv ${sdrf.baseName}_config.tsv mv experimental_design.tsv ${sdrf.baseName}_openms_design.tsv - # Adding here the removal of the .tar, since DIANN takes the .d directly - # all logs from the tool match only the .d suffix. so it is easier to - # remove it here than doing the forensic tracking back of the file. - sed -i -e "s/((.tar)|(.tar.gz))\\t/\\t/g" ${sdrf.baseName}_openms_design.tsv - - # TODO: since I added support for .gz ... how are we aliasing? - # if someone packs a .d in a .gz (not .d.gz or .d.tar.gz), how should we - # know what extension to keep? - cat <<-END_VERSIONS > versions.yml "${task.process}": sdrf-pipelines: \$(parse_sdrf --version 2>&1 | awk -F ' ' '{print \$2}') From 4e5fba8cb58d766d8561be947708f33828c4f3ae Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Mon, 25 Sep 2023 13:07:52 +0100 Subject: [PATCH 056/113] update some process to openms dev --- modules/local/openms/msstatsconverter/main.nf | 6 +++--- modules/local/openms/proteininference/main.nf | 6 +++--- modules/local/openms/proteinquantifier/main.nf | 6 +++--- modules/local/openms/proteomicslfq/main.nf | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/modules/local/openms/msstatsconverter/main.nf b/modules/local/openms/msstatsconverter/main.nf index 283ca494..7aad40ee 100644 --- a/modules/local/openms/msstatsconverter/main.nf +++ b/modules/local/openms/msstatsconverter/main.nf @@ -2,10 +2,10 @@ process MSSTATSCONVERTER { tag "$exp_file.Name" label 'process_low' - conda "bioconda::openms=2.9.1" + conda "openms::openms-thirdparty=3.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_0' : - 'quay.io/biocontainers/openms:2.9.1--h135471a_0' }" + 'ghcr.io/openms/openms-executables-sif:latest' : + 'ghcr.io/openms/openms-executables:latest' }" input: path consensusXML diff --git a/modules/local/openms/proteininference/main.nf b/modules/local/openms/proteininference/main.nf index 3276403e..962b2366 100644 --- a/modules/local/openms/proteininference/main.nf +++ b/modules/local/openms/proteininference/main.nf @@ -1,10 +1,10 @@ process PROTEININFERENCE { label 'process_medium' - conda "bioconda::openms=2.9.1" + conda "openms::openms-thirdparty=3.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_0' : - 'quay.io/biocontainers/openms:2.9.1--h135471a_0' }" + 'ghcr.io/openms/openms-executables-sif:latest' : + 'ghcr.io/openms/openms-executables:latest' }" input: tuple val(meta), path(consus_file) diff --git a/modules/local/openms/proteinquantifier/main.nf b/modules/local/openms/proteinquantifier/main.nf index b2815d97..a1598835 100644 --- a/modules/local/openms/proteinquantifier/main.nf +++ b/modules/local/openms/proteinquantifier/main.nf @@ -2,10 +2,10 @@ process PROTEINQUANTIFIER { tag "${pro_quant_exp.baseName}" label 'process_medium' - conda "bioconda::openms=2.9.1" + conda "openms::openms-thirdparty=3.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_0' : - 'quay.io/biocontainers/openms:2.9.1--h135471a_0' }" + 'ghcr.io/openms/openms-executables-sif:latest' : + 'ghcr.io/openms/openms-executables:latest' }" input: path epi_filt_resolve diff --git a/modules/local/openms/proteomicslfq/main.nf b/modules/local/openms/proteomicslfq/main.nf index 4e0e3b65..5267520e 100644 --- a/modules/local/openms/proteomicslfq/main.nf +++ b/modules/local/openms/proteomicslfq/main.nf @@ -2,10 +2,10 @@ process PROTEOMICSLFQ { tag "${expdes.baseName}" label 'process_high' - conda "bioconda::openms=2.9.1" + conda "openms::openms-thirdparty=3.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.9.1--h135471a_0' : - 'quay.io/biocontainers/openms:2.9.1--h135471a_0' }" + 'ghcr.io/openms/openms-executables-sif:latest' : + 'ghcr.io/openms/openms-executables:latest' }" input: path(mzmls) From 39df6feb3559660c57e7390eecc5d88d3265f986 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Mon, 25 Sep 2023 13:13:06 +0100 Subject: [PATCH 057/113] pmultiqc 0.0.20 -> 0.0.21 --- modules/local/diannconvert/main.nf | 6 +++--- modules/local/pmultiqc/main.nf | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/diannconvert/main.nf b/modules/local/diannconvert/main.nf index 44ebae27..d4583dcc 100644 --- a/modules/local/diannconvert/main.nf +++ b/modules/local/diannconvert/main.nf @@ -2,11 +2,11 @@ process DIANNCONVERT { tag "$meta.experiment_id" label 'process_medium' - conda "conda-forge::pandas_schema conda-forge::lzstring bioconda::pmultiqc=0.0.20" + conda "conda-forge::pandas_schema conda-forge::lzstring bioconda::pmultiqc=0.0.21" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/pmultiqc:0.0.20--pyhdfd78af_0" + container "https://depot.galaxyproject.org/singularity/pmultiqc:0.0.21--pyhdfd78af_0" } else { - container "quay.io/biocontainers/pmultiqc:0.0.20--pyhdfd78af_0" + container "quay.io/biocontainers/pmultiqc:0.0.21--pyhdfd78af_0" } input: diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index 484cf39c..b42a1bb3 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -1,11 +1,11 @@ process PMULTIQC { label 'process_high' - conda "conda-forge::pandas_schema conda-forge::lzstring bioconda::pmultiqc=0.0.20" + conda "conda-forge::pandas_schema conda-forge::lzstring bioconda::pmultiqc=0.0.21" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/pmultiqc:0.0.20--pyhdfd78af_0" + container "https://depot.galaxyproject.org/singularity/pmultiqc:0.0.21--pyhdfd78af_0" } else { - container "quay.io/biocontainers/pmultiqc:0.0.20--pyhdfd78af_0" + container "quay.io/biocontainers/pmultiqc:0.0.21--pyhdfd78af_0" } input: From 6ffced143c8db589c45aeba8641ef0c8c304e3c3 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Mon, 25 Sep 2023 13:18:40 +0100 Subject: [PATCH 058/113] msstats and msstats versions increased --- modules/local/msstats/main.nf | 6 +++--- modules/local/msstatstmt/main.nf | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/msstats/main.nf b/modules/local/msstats/main.nf index 0ca36197..435fb7ce 100644 --- a/modules/local/msstats/main.nf +++ b/modules/local/msstats/main.nf @@ -2,11 +2,11 @@ process MSSTATS { tag "$msstats_csv_input.Name" label 'process_medium' - conda "bioconda::bioconductor-msstats=4.2.0" + conda "bioconda::bioconductor-msstats=4.8.3" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/bioconductor-msstats:4.2.0--r41h619a076_1" + container "https://depot.galaxyproject.org/singularity/bioconductor-msstats:4.8.3--r43hf17093f_0" } else { - container "quay.io/biocontainers/bioconductor-msstats:4.2.0--r41h619a076_1" + container "quay.io/biocontainers/bioconductor-msstats:4.8.3--r43hf17093f_0" } input: diff --git a/modules/local/msstatstmt/main.nf b/modules/local/msstatstmt/main.nf index 42bb10c9..f749392d 100644 --- a/modules/local/msstatstmt/main.nf +++ b/modules/local/msstatstmt/main.nf @@ -2,11 +2,11 @@ process MSSTATSTMT { tag "$msstatstmt_csv_input.Name" label 'process_medium' - conda "bioconda::bioconductor-msstatstmt=2.2.0" + conda "bioconda::bioconductor-msstatstmt=2.8.0" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/bioconductor-msstatstmt:2.2.0--r41hdfd78af_0" + container "https://depot.galaxyproject.org/singularity/bioconductor-msstatstmt:2.8.0--r43hdfd78af_0" } else { - container "quay.io/biocontainers/bioconductor-msstatstmt:2.2.0--r41hdfd78af_0" + container "quay.io/biocontainers/bioconductor-msstatstmt:2.8.0--r43hdfd78af_0" } input: From 223c62fd56e07d85a5067408ea19643cec3f5d28 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Mon, 25 Sep 2023 13:38:59 +0100 Subject: [PATCH 059/113] - disable read idxml in pmultiqc - remove PTXQC from workflow --- modules/local/pmultiqc/main.nf | 2 ++ nextflow.config | 7 +++---- nextflow_schema.json | 13 ++++--------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index b42a1bb3..7941e84d 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -23,6 +23,7 @@ process PMULTIQC { def args = task.ext.args ?: '' def disable_pmultiqc = (params.enable_pmultiqc) && (params.export_mztab) ? "" : "--disable_plugin" def disable_table_plots = (params.enable_pmultiqc) && (params.skip_table_plots) ? "--disable_table" : "" + def disable_idxml_index = (params.enable_pmultiqc) && (params.pmultiqc_idxml_skip) ? "--ignored_idxml" : "" """ multiqc \\ @@ -31,6 +32,7 @@ process PMULTIQC { ${args} \\ ${disable_pmultiqc} \\ ${disable_table_plots} \\ + ${disable_idxml_index} \\ --quantification_method $params.quantification_method \\ ./results \\ -o . diff --git a/nextflow.config b/nextflow.config index 06dd2bca..8c10edca 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,7 +22,6 @@ params { // Tools flags posterior_probabilities = 'percolator' add_decoys = false - enable_pmultiqc = true search_engines = 'comet' sage_processes = 1 run_fdr_cutoff = 0.10 @@ -190,9 +189,9 @@ params { msstatsiso_reference_normalization = true msstats_plot_profile_qc = false - // PTXQC - enable_qc = false - ptxqc_report_layout = null + // pmultiqc options + enable_pmultiqc = true + pmultiqc_idxml_skip = true // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 70c48be3..1cbf8405 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1048,19 +1048,14 @@ "description": "", "default": "", "properties": { - "enable_qc": { + "enable_pmultiqc": { "type": "boolean", - "description": "Enable generation of quality control report by PTXQC? default: 'false' since it is still unstable", + "description": "Enable generation of pmultiqc report? default: 'false'", "fa_icon": "fas fa-toggle-on" }, - "ptxqc_report_layout": { - "type": "string", - "description": "Specify a yaml file for the report layout (see PTXQC documentation) (TODO not yet fully implemented)", - "fa_icon": "far fa-file" - }, - "enable_pmultiqc": { + "pmultiqc_idxml_skip": { "type": "boolean", - "description": "Enable generation of pmultiqc report? default: 'false'", + "description": "Skip idXML files (do not generate search engine scores) in pmultiqc report? default: 'true'", "fa_icon": "fas fa-toggle-on" } }, From ec5b3aec47c0677627d8d7acb3bdb5e24ec57c15 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Mon, 25 Sep 2023 14:49:27 +0100 Subject: [PATCH 060/113] id_transfer_threshold included for MBR in proteomicsLFQ --- modules/local/openms/proteomicslfq/main.nf | 1 + nextflow.config | 1 + nextflow_schema.json | 6 ++++++ 3 files changed, 8 insertions(+) diff --git a/modules/local/openms/proteomicslfq/main.nf b/modules/local/openms/proteomicslfq/main.nf index 5267520e..1b20d4dd 100644 --- a/modules/local/openms/proteomicslfq/main.nf +++ b/modules/local/openms/proteomicslfq/main.nf @@ -30,6 +30,7 @@ process PROTEOMICSLFQ { script: def args = task.ext.args ?: '' def msstats_present = params.quantification_method == "feature_intensity" ? "-out_msstats ${expdes.baseName}_msstats_in.csv" : "" + def id_transfer_threshold = (params.quantification_method == "feature_intensity") && (!params.targeted_only) ? "-id_transfer_threshold ${params.id_transfer_threshold}" : "" def triqler_present = (params.quantification_method == "feature_intensity") && (params.add_triqler_output) ? "-out_triqler ${expdes.baseName}_triqler_in.tsv" : "" def decoys_present = (params.quantify_decoys || ((params.quantification_method == "feature_intensity") && params.add_triqler_output)) ? '-PeptideQuantification:quantify_decoys' : '' def mzml_sorted = mzmls.collect().sort{ a, b -> a.name <=> b.name} diff --git a/nextflow.config b/nextflow.config index 8c10edca..cb2db83e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -154,6 +154,7 @@ params { alignment_order = 'star' add_triqler_output = false quantify_decoys = false + id_transfer_threshold = 0.50 // only used if targeted_only is set to false (default) // DIA-NN diann_debug = 3 diff --git a/nextflow_schema.json b/nextflow_schema.json index 1cbf8405..f2d51d3c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -834,6 +834,12 @@ "default": true, "fa_icon": "far fa-check-square" }, + "id_transfer_threshold": { + "type": "number", + "description": "The maximum percentage of samples that will be accepted for matching between runs (default: 0.5)", + "default": 0.5, + "fa_icon": "fas fa-filter" + }, "alignment_order": { "type": "string", "description": "The order in which maps are aligned. Star = all vs. the reference with most IDs (default). TreeGuided = an alignment tree is calculated first based on similarity measures of the IDs in the maps.", From 8f454d4bc84849ce8da45c62723fd1aa17d8a499 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Mon, 25 Sep 2023 15:11:31 +0100 Subject: [PATCH 061/113] transfer_ids instead of targeted_only --- modules/local/openms/proteomicslfq/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/openms/proteomicslfq/main.nf b/modules/local/openms/proteomicslfq/main.nf index 1b20d4dd..a290ad1a 100644 --- a/modules/local/openms/proteomicslfq/main.nf +++ b/modules/local/openms/proteomicslfq/main.nf @@ -30,7 +30,7 @@ process PROTEOMICSLFQ { script: def args = task.ext.args ?: '' def msstats_present = params.quantification_method == "feature_intensity" ? "-out_msstats ${expdes.baseName}_msstats_in.csv" : "" - def id_transfer_threshold = (params.quantification_method == "feature_intensity") && (!params.targeted_only) ? "-id_transfer_threshold ${params.id_transfer_threshold}" : "" + def id_transfer_threshold = (params.quantification_method == "feature_intensity") && (transfer_ids != "off") ? "-id_transfer_threshold ${params.id_transfer_threshold}" : "" def triqler_present = (params.quantification_method == "feature_intensity") && (params.add_triqler_output) ? "-out_triqler ${expdes.baseName}_triqler_in.tsv" : "" def decoys_present = (params.quantify_decoys || ((params.quantification_method == "feature_intensity") && params.add_triqler_output)) ? '-PeptideQuantification:quantify_decoys' : '' def mzml_sorted = mzmls.collect().sort{ a, b -> a.name <=> b.name} From 1e449638cf9573e51365d6851b10d7d027713a1d Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Mon, 25 Sep 2023 16:05:43 +0100 Subject: [PATCH 062/113] transfer_ids instead of targeted_only --- modules/local/openms/proteomicslfq/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/openms/proteomicslfq/main.nf b/modules/local/openms/proteomicslfq/main.nf index a290ad1a..8841a682 100644 --- a/modules/local/openms/proteomicslfq/main.nf +++ b/modules/local/openms/proteomicslfq/main.nf @@ -30,7 +30,7 @@ process PROTEOMICSLFQ { script: def args = task.ext.args ?: '' def msstats_present = params.quantification_method == "feature_intensity" ? "-out_msstats ${expdes.baseName}_msstats_in.csv" : "" - def id_transfer_threshold = (params.quantification_method == "feature_intensity") && (transfer_ids != "off") ? "-id_transfer_threshold ${params.id_transfer_threshold}" : "" + def id_transfer_threshold = (params.quantification_method == "feature_intensity") && (params.transfer_ids != "off") ? "-id_transfer_threshold ${params.id_transfer_threshold}" : "" def triqler_present = (params.quantification_method == "feature_intensity") && (params.add_triqler_output) ? "-out_triqler ${expdes.baseName}_triqler_in.tsv" : "" def decoys_present = (params.quantify_decoys || ((params.quantification_method == "feature_intensity") && params.add_triqler_output)) ? '-PeptideQuantification:quantify_decoys' : '' def mzml_sorted = mzmls.collect().sort{ a, b -> a.name <=> b.name} From be5cd3d09317c119df3c891d79f0afa64a68342b Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Mon, 25 Sep 2023 15:17:22 +0000 Subject: [PATCH 063/113] Template update for nf-core/tools version 2.10 --- .devcontainer/devcontainer.json | 1 + .github/CONTRIBUTING.md | 4 +- .github/workflows/linting.yml | 2 +- .github/workflows/release-announcments.yml | 68 +++++++++ CITATIONS.md | 2 +- CODE_OF_CONDUCT.md | 133 ++++++++++++++---- README.md | 21 +-- assets/multiqc_config.yml | 4 +- conf/modules.config | 9 ++ docs/output.md | 5 +- docs/usage.md | 16 ++- lib/NfcoreTemplate.groovy | 16 +++ lib/WorkflowQuantms.groovy | 2 +- main.nf | 3 + modules.json | 6 +- .../custom/dumpsoftwareversions/main.nf | 2 +- modules/nf-core/fastqc/main.nf | 8 +- modules/nf-core/multiqc/main.nf | 2 +- nextflow.config | 7 +- nextflow_schema.json | 15 -- workflows/quantms.nf | 1 + 21 files changed, 251 insertions(+), 76 deletions(-) create mode 100644 .github/workflows/release-announcments.yml diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a584..4ecfbfe3 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 357f3d98..01fbda84 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,7 +9,9 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/quantms then the best place to ask is on the nf-core Slack [#quantms](https://nfcore.slack.com/channels/quantms) channel ([join our Slack here](https://nf-co.re/join/slack)). +:::info +If you need help using or modifying nf-core/quantms then the best place to ask is on the nf-core Slack [#quantms](https://nfcore.slack.com/channels/quantms) channel ([join our Slack here](https://nf-co.re/join/slack)). +::: ## Contribution workflow diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 888cb4bc..b8bdd214 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -78,7 +78,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "3.8" + python-version: "3.11" architecture: "x64" - name: Install dependencies diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcments.yml new file mode 100644 index 00000000..6ad33927 --- /dev/null +++ b/.github/workflows/release-announcments.yml @@ -0,0 +1,68 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@v0.0.2 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/CITATIONS.md b/CITATIONS.md index 35dfcbad..596f9031 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,7 +12,7 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052f..c089ec78 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris TuƱƭ, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris TuƱƭ, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares šŸ˜Š -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community memberā€™s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis šŸŽ‰ šŸ„³ šŸ’Æ šŸ™Œ !) +- Celebrate your accomplishments! (Get creative with your use of emojis šŸŽ‰ šŸ„³ šŸ’Æ šŸ™Œ !) - Demonstrate empathy towards other community members. (We donā€™t all have the same amount of time to dedicate to nf-core. If tasks are pending, donā€™t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so letā€™s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (Weā€™ve all been outsiders and we know that talking to strangers can be hard for some, but remember weā€™re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we donā€™t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we donā€™t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on šŸ˜• -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - ā€œDoxingā€ i.e. posting (or threatening to post) another personā€™s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable ā€” reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris TuƱƭ, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager ā€” possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participantsā€™ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-coreā€™s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted ā€” this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index 9950f05e..1af22a17 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # ![nf-core/quantms](docs/images/nf-core-quantms_logo_light.png#gh-light-mode-only) ![nf-core/quantms](docs/images/nf-core-quantms_logo_dark.png#gh-dark-mode-only) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/quantms/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![GitHub Actions CI Status](https://github.com/nf-core/quantms/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/quantms/actions?query=workflow%3A%22nf-core+CI%22) +[![GitHub Actions Linting Status](https://github.com/nf-core/quantms/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/quantms/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/quantms/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) @@ -29,10 +30,11 @@ ## Usage -> **Note** -> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -> with `-profile test` before running the workflow on actual data. +:::note +If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how +to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) +with `-profile test` before running the workflow on actual data. +:::