From 692a2d680537179e2e2df66b97988d8d4e97eef0 Mon Sep 17 00:00:00 2001 From: hechth Date: Wed, 18 Oct 2023 16:08:06 +0200 Subject: [PATCH 1/4] removed matchms similarity wrapper --- tools/matchms/matchms_similarity.xml | 196 -------------------- tools/matchms/matchms_similarity_wrapper.py | 136 -------------- 2 files changed, 332 deletions(-) delete mode 100644 tools/matchms/matchms_similarity.xml delete mode 100644 tools/matchms/matchms_similarity_wrapper.py diff --git a/tools/matchms/matchms_similarity.xml b/tools/matchms/matchms_similarity.xml deleted file mode 100644 index ce4f7204..00000000 --- a/tools/matchms/matchms_similarity.xml +++ /dev/null @@ -1,196 +0,0 @@ - - calculate the similarity score and matched peaks - - - macros.xml - help.xml - - - - - - spec2vec - matchms - - - - - - \$_GALAXY_JOB_TMP_DIR - - - - - python3 ${__tool_directory__}/matchms_similarity_wrapper.py \ - #if $ri_filtering.is_true == "TRUE" - -r $ri_filtering.tolerance \ - #end if - #if $symmetric.is_symmetric == "TRUE" - -s \ - #else - --ref "$references" \ - --ref_format "$references.ext" \ - #end if - --array_type "$array_type" \ - #if $metric.similarity_metric == "Spec2Vec" - --spec2vec_model "$metric.model_metadata" \ - --spec2vec_weights "$metric.model_weights" \ - --allow_missing_percentage $metric.algorithm.allow_missing_percentage \ - #end if - "$queries" \ - "$queries.ext" \ - "$metric.similarity_metric" \ - #if $metric.similarity_metric == "Spec2Vec" - 0 \ - 0 \ - #else - $metric.algorithm.tolerance \ - $metric.algorithm.mz_power \ - #end if - $metric.algorithm.intensity_power \ - "$similarity_scores" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - -
-
-
- - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @HELP_matchms@ - - - -
diff --git a/tools/matchms/matchms_similarity_wrapper.py b/tools/matchms/matchms_similarity_wrapper.py deleted file mode 100644 index 26bfe44f..00000000 --- a/tools/matchms/matchms_similarity_wrapper.py +++ /dev/null @@ -1,136 +0,0 @@ -import argparse -import json -import sys - -from matchms import calculate_scores -from matchms.importing import load_from_mgf, load_from_msp -from matchms.similarity import (CosineGreedy, CosineHungarian, MetadataMatch, - ModifiedCosine, NeutralLossesCosine) -from spec2vec import Spec2Vec -from spec2vec.serialization.model_importing import load_weights, Word2VecLight - - -def convert_precursor_mz(spectrum): - """ - Check the presence of precursor m/z since it is needed for ModifiedCosine similarity metric. Convert to float if - needed, raise error if missing. - """ - - if "precursor_mz" in spectrum.metadata: - metadata = spectrum.metadata - metadata["precursor_mz"] = float(metadata["precursor_mz"]) - spectrum.metadata = metadata - return spectrum - else: - raise ValueError("Precursor_mz missing. Apply 'add_precursor_mz' filter first.") - - -def load_model(model_file, weights_file) -> Word2VecLight: - """ - Read a lightweight version of a :class:`~gensim.models.Word2Vec` model from disk. - - Parameters - ---------- - model_file: - A path of json file to load the model. - weights_file: - A path of `.npy` file to load the model's weights. - - Returns - ------- - :class:`~spec2vec.serialization.model_importing.Word2VecLight` – a lightweight version of a - :class:`~gensim.models.Word2Vec` - """ - with open(model_file, "r", encoding="utf-8") as f: - model: dict = json.load(f) - del (model["mapfile_path"]) - - weights = load_weights(weights_file, model["__weights_format"]) - return Word2VecLight(model, weights) - - -def main(argv): - parser = argparse.ArgumentParser(description="Compute MSP similarity scores") - parser.add_argument("-r", dest="ri_tolerance", type=float, help="Use RI filtering with given tolerance.") - parser.add_argument("-s", dest="symmetric", action='store_true', help="Computation is symmetric.") - parser.add_argument("--array_type", type=str, help="Type of array to use for storing scores (numpy or sparse).") - parser.add_argument("--ref", dest="references_filename", type=str, help="Path to reference spectra library.") - parser.add_argument("--ref_format", dest="references_format", type=str, help="Reference spectra library file format.") - parser.add_argument("--spec2vec_model", dest="spec2vec_model", type=str, help="Path to spec2vec model.") - parser.add_argument("--spec2vec_weights", dest="spec2vec_weights", type=str, help="Path to spec2vec weights.") - parser.add_argument("--allow_missing_percentage", dest="allowed_missing_percentage", type=lambda x: float(x) * 100.0, help="Maximum percentage of missing peaks in model corpus.") - parser.add_argument("queries_filename", type=str, help="Path to query spectra.") - parser.add_argument("queries_format", type=str, help="Query spectra file format.") - parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.') - parser.add_argument("tolerance", type=float, help="Tolerance to use for peak matching.") - parser.add_argument("mz_power", type=float, help="The power to raise mz to in the cosine function.") - parser.add_argument("intensity_power", type=float, help="The power to raise intensity to in the cosine function.") - parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .json scores.") - args = parser.parse_args() - - if args.queries_format == 'msp': - queries_spectra = list(load_from_msp(args.queries_filename)) - elif args.queries_format == 'mgf': - queries_spectra = list(load_from_mgf(args.queries_filename)) - else: - raise ValueError(f'File format {args.queries_format} not supported for query spectra.') - - if args.symmetric: - reference_spectra = queries_spectra.copy() - else: - if args.references_format == 'msp': - reference_spectra = list(load_from_msp(args.references_filename)) - elif args.references_format == 'mgf': - reference_spectra = list(load_from_mgf(args.references_filename)) - else: - raise ValueError(f'File format {args.references_format} not supported for reference spectra library.') - - if args.similarity_metric == 'CosineGreedy': - similarity_metric = CosineGreedy(args.tolerance, args.mz_power, args.intensity_power) - elif args.similarity_metric == 'CosineHungarian': - similarity_metric = CosineHungarian(args.tolerance, args.mz_power, args.intensity_power) - elif args.similarity_metric == 'ModifiedCosine': - similarity_metric = ModifiedCosine(args.tolerance, args.mz_power, args.intensity_power) - reference_spectra = list(map(convert_precursor_mz, reference_spectra)) - queries_spectra = list(map(convert_precursor_mz, queries_spectra)) - elif args.similarity_metric == 'NeutralLossesCosine': - similarity_metric = NeutralLossesCosine(args.tolerance, args.mz_power, args.intensity_power) - reference_spectra = list(map(convert_precursor_mz, reference_spectra)) - queries_spectra = list(map(convert_precursor_mz, queries_spectra)) - elif args.similarity_metric == 'Spec2Vec': - model = load_model(args.spec2vec_model, args.spec2vec_weights) - similarity_metric = Spec2Vec(model, intensity_weighting_power=args.intensity_power, allowed_missing_percentage=args.allowed_missing_percentage) - else: - return -1 - - print("Calculating scores...") - scores = calculate_scores( - references=reference_spectra, - queries=queries_spectra, - array_type=args.array_type, - similarity_function=similarity_metric, - is_symmetric=args.symmetric - ) - - if args.ri_tolerance is not None: - print("RI filtering with tolerance ", args.ri_tolerance) - ri_matches = calculate_scores(references=reference_spectra, - queries=queries_spectra, - similarity_function=MetadataMatch("retention_index", "difference", args.ri_tolerance), - array_type="numpy", - is_symmetric=args.symmetric).scores - scores.scores.add_coo_matrix(ri_matches, "MetadataMatch", join_type="inner") - - write_outputs(args, scores) - return 0 - - -def write_outputs(args, scores): - """Write Scores to json file.""" - print("Storing outputs...") - scores.to_json(args.output_filename_scores) - - -if __name__ == "__main__": - main(argv=sys.argv[1:]) - pass From 1a1b590b49153ab59834b2677a19638e25914180 Mon Sep 17 00:00:00 2001 From: hechth Date: Wed, 18 Oct 2023 16:08:40 +0200 Subject: [PATCH 2/4] updated matchms version --- tools/matchms/macros.xml | 2 +- tools/matchms/matchms_add_key.xml | 2 +- tools/matchms/matchms_convert.xml | 2 +- tools/matchms/matchms_filtering.xml | 2 +- tools/matchms/matchms_fingerprint_similarity.xml | 2 +- tools/matchms/matchms_formatter.xml | 2 +- tools/matchms/matchms_metadata_export.xml | 2 +- tools/matchms/matchms_metadata_match.xml | 2 +- tools/matchms/matchms_networking.xml | 2 +- tools/matchms/matchms_spectral_similarity.xml | 2 +- tools/matchms/matchms_split.xml | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/matchms/macros.xml b/tools/matchms/macros.xml index 83a1a8c5..d9537e1f 100644 --- a/tools/matchms/macros.xml +++ b/tools/matchms/macros.xml @@ -1,5 +1,5 @@ - 0.22.0 + 0.23.1 diff --git a/tools/matchms/matchms_add_key.xml b/tools/matchms/matchms_add_key.xml index 4242a423..33851be0 100644 --- a/tools/matchms/matchms_add_key.xml +++ b/tools/matchms/matchms_add_key.xml @@ -1,4 +1,4 @@ - + Set metadata key in MSP to static value diff --git a/tools/matchms/matchms_convert.xml b/tools/matchms/matchms_convert.xml index 74624488..0a163715 100644 --- a/tools/matchms/matchms_convert.xml +++ b/tools/matchms/matchms_convert.xml @@ -1,4 +1,4 @@ - + convert between mass spectral library formats (.mgf/.msp/.json) using matchms diff --git a/tools/matchms/matchms_filtering.xml b/tools/matchms/matchms_filtering.xml index 89cf71a9..64037f25 100644 --- a/tools/matchms/matchms_filtering.xml +++ b/tools/matchms/matchms_filtering.xml @@ -1,4 +1,4 @@ - + filter and normalize mass spectrometry data diff --git a/tools/matchms/matchms_fingerprint_similarity.xml b/tools/matchms/matchms_fingerprint_similarity.xml index 4f5718b2..df9aeeb0 100644 --- a/tools/matchms/matchms_fingerprint_similarity.xml +++ b/tools/matchms/matchms_fingerprint_similarity.xml @@ -1,4 +1,4 @@ - + calculate similarity between molecular fingerprints calculated from structural spectrum metadata descriptors diff --git a/tools/matchms/matchms_formatter.xml b/tools/matchms/matchms_formatter.xml index 23473178..bae8ab25 100644 --- a/tools/matchms/matchms_formatter.xml +++ b/tools/matchms/matchms_formatter.xml @@ -1,4 +1,4 @@ - + reformat scores object of matchms to long format table diff --git a/tools/matchms/matchms_metadata_export.xml b/tools/matchms/matchms_metadata_export.xml index 79079e8c..feba2226 100644 --- a/tools/matchms/matchms_metadata_export.xml +++ b/tools/matchms/matchms_metadata_export.xml @@ -1,4 +1,4 @@ - + extract all metadata from mass spectra file to tabular format macros.xml diff --git a/tools/matchms/matchms_metadata_match.xml b/tools/matchms/matchms_metadata_match.xml index 83522078..553d174e 100644 --- a/tools/matchms/matchms_metadata_match.xml +++ b/tools/matchms/matchms_metadata_match.xml @@ -1,4 +1,4 @@ - + matchms metadata match calculation for numeric fields based on tolerance diff --git a/tools/matchms/matchms_networking.xml b/tools/matchms/matchms_networking.xml index 16b25843..b8c3b2d7 100644 --- a/tools/matchms/matchms_networking.xml +++ b/tools/matchms/matchms_networking.xml @@ -1,4 +1,4 @@ - + create similarity network graph from matchms similarity scores diff --git a/tools/matchms/matchms_spectral_similarity.xml b/tools/matchms/matchms_spectral_similarity.xml index 7b6c064c..d0d9c82e 100644 --- a/tools/matchms/matchms_spectral_similarity.xml +++ b/tools/matchms/matchms_spectral_similarity.xml @@ -1,4 +1,4 @@ - + matchms spectral similarity calculation diff --git a/tools/matchms/matchms_split.xml b/tools/matchms/matchms_split.xml index 7a26a8a5..5e9e4a2b 100644 --- a/tools/matchms/matchms_split.xml +++ b/tools/matchms/matchms_split.xml @@ -1,4 +1,4 @@ - + split a large library into subsets macros.xml From 6209a03b34f8c43977ba29da159b151325e03422 Mon Sep 17 00:00:00 2001 From: hechth Date: Wed, 18 Oct 2023 16:09:00 +0200 Subject: [PATCH 3/4] added filtering option for matchms & spec2vec --- tools/matchms/matchms_spectral_similarity.xml | 4 ++++ tools/spec2vec/spec2vec_similarity.xml | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/matchms/matchms_spectral_similarity.xml b/tools/matchms/matchms_spectral_similarity.xml index d0d9c82e..51075c94 100644 --- a/tools/matchms/matchms_spectral_similarity.xml +++ b/tools/matchms/matchms_spectral_similarity.xml @@ -53,7 +53,9 @@ scores._scores = similarity.matrix( scores._scores.data.dtype.names = [name+"_scores", name+"_matches"] #end if +#if $filter_zero == "TRUE" scores.filter_by_range(name=name+"_matches", low=0) +#end if scores.to_json("$similarity_scores") @@ -64,6 +66,8 @@ scores.to_json("$similarity_scores") + diff --git a/tools/spec2vec/spec2vec_similarity.xml b/tools/spec2vec/spec2vec_similarity.xml index 16aa3cd9..f4255337 100644 --- a/tools/spec2vec/spec2vec_similarity.xml +++ b/tools/spec2vec/spec2vec_similarity.xml @@ -1,4 +1,4 @@ - + macros.xml @@ -39,8 +39,10 @@ layer = similarity.sparse_array( is_symmetric=False) scores._scores.add_sparse_data(scores._scores.row, scores._scores.col, layer, name) - +#if $filter_zero == "TRUE" scores.filter_by_range(inplace=True, name=name, low=0) +#end if + scores.to_json("$similarity_scores") @@ -56,6 +58,8 @@ scores.to_json("$similarity_scores") The default is 0, which means that no weighing will be done."/> + From b8ea8d1d84e36a80181d3073924285dde5f15186 Mon Sep 17 00:00:00 2001 From: hechth Date: Thu, 19 Oct 2023 12:49:50 +0200 Subject: [PATCH 4/4] Removed filtering option as it can't be disabled at the moment --- tools/matchms/matchms_spectral_similarity.xml | 4 ---- tools/spec2vec/spec2vec_similarity.xml | 6 +----- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/tools/matchms/matchms_spectral_similarity.xml b/tools/matchms/matchms_spectral_similarity.xml index 51075c94..d0d9c82e 100644 --- a/tools/matchms/matchms_spectral_similarity.xml +++ b/tools/matchms/matchms_spectral_similarity.xml @@ -53,9 +53,7 @@ scores._scores = similarity.matrix( scores._scores.data.dtype.names = [name+"_scores", name+"_matches"] #end if -#if $filter_zero == "TRUE" scores.filter_by_range(name=name+"_matches", low=0) -#end if scores.to_json("$similarity_scores") @@ -66,8 +64,6 @@ scores.to_json("$similarity_scores") - diff --git a/tools/spec2vec/spec2vec_similarity.xml b/tools/spec2vec/spec2vec_similarity.xml index f4255337..802a6e27 100644 --- a/tools/spec2vec/spec2vec_similarity.xml +++ b/tools/spec2vec/spec2vec_similarity.xml @@ -39,10 +39,8 @@ layer = similarity.sparse_array( is_symmetric=False) scores._scores.add_sparse_data(scores._scores.row, scores._scores.col, layer, name) -#if $filter_zero == "TRUE" -scores.filter_by_range(inplace=True, name=name, low=0) -#end if +scores.filter_by_range(inplace=True, name=name, low=0) scores.to_json("$similarity_scores") @@ -58,8 +56,6 @@ scores.to_json("$similarity_scores") The default is 0, which means that no weighing will be done."/> -