From 5d7a0713fdf52a8381cc011bdfe879f62933530f Mon Sep 17 00:00:00 2001 From: Zargham Ahmad Date: Thu, 26 Oct 2023 15:53:15 +0200 Subject: [PATCH 1/5] added matchms_metadata_merge tool --- tools/matchms/matchms_metadata_merge.xml | 91 ++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 tools/matchms/matchms_metadata_merge.xml diff --git a/tools/matchms/matchms_metadata_merge.xml b/tools/matchms/matchms_metadata_merge.xml new file mode 100644 index 00000000..94703d27 --- /dev/null +++ b/tools/matchms/matchms_metadata_merge.xml @@ -0,0 +1,91 @@ + + Merge metadata csv into MSP by a specified column + + + macros.xml + help.xml + + + + + + operation_2409 + + + + + matchms + + + + + + +import pandas +import matchms +import numpy as np + +matchms.set_matchms_logger_level('ERROR') + +spectra = list(matchms.importing.load_from_msp('${spectral_library}', False)) + +metadata_table = pandas.read_csv('${metadata_table_file}', dtype=object) +metadata_table.columns = map(str.lower, metadata_table.columns) + +metadata_table.drop_duplicates(subset='${user_specified_column}'.lower(), inplace=True) + +spectra_metadata= pandas.DataFrame.from_dict([x.metadata for x in spectra]) +spectra_metadata.dropna(axis=1, inplace=True) + +merged = metadata_table.merge(spectra_metadata, on='${user_specified_column}'.lower(), how='right') + +spectra_arr = np.asarray(spectra, dtype=object) + +def update_metadata(spectrum: matchms.Spectrum, row): + metadata = spectrum.metadata + metadata.update(row) + spectrum.metadata = metadata + return spectrum + +vec_update_metadata = np.vectorize(update_metadata) +merged_array = vec_update_metadata(spectra_arr, merged.to_dict(orient='records')) + +matchms.exporting.save_as_msp(merged_array.tolist(), '${output}') + + + + + + + + + + + + + + + + + + + + + + + + + + **Description** + The tool takes an msp file and a metadata csv file and merges the metadata in the csv + file with the metadata in the MSP file on a user specified column. + + + + https://doi.org/10.5281/zenodo.8083373 + + From dbc8501ed12ca0271832ddababee2789ae734a93 Mon Sep 17 00:00:00 2001 From: Zargham Ahmad Date: Thu, 26 Oct 2023 15:54:19 +0200 Subject: [PATCH 2/5] added testdata for matchms_metadata_merge tool --- .../test-data/metadata_merge/input.msp | 35 ++++++++++++++++ .../test-data/metadata_merge/metadata.csv | 4 ++ .../test-data/metadata_merge/output.msp | 41 +++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 tools/matchms/test-data/metadata_merge/input.msp create mode 100644 tools/matchms/test-data/metadata_merge/metadata.csv create mode 100644 tools/matchms/test-data/metadata_merge/output.msp diff --git a/tools/matchms/test-data/metadata_merge/input.msp b/tools/matchms/test-data/metadata_merge/input.msp new file mode 100644 index 00000000..6e0a800f --- /dev/null +++ b/tools/matchms/test-data/metadata_merge/input.msp @@ -0,0 +1,35 @@ +INCHI: InChI=1S/C10H15N5O10P2/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10)1-23-27(21,22)25-26(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H2,11,12,13)(H2,18,19,20) +INSTRUMENTTYPE: LC-ESI-QQ +COLLISIONENERGY: 40 +FORMULA: C10H15N5O10P2 +COMPOUND_NAME: ADP +PRECURSORMZ: 428.31 +IONMODE: positive +NUM PEAKS: 2 +135.0 83.0 +136.0 999.0 + +INCHIKEY: BEJNERDRQOWKJM-UHFFFAOYSA-N +INCHI: InChI=1S/C6H6O4/c7-2-4-1-5(8)6(9)3-10-4/h1,3,7,9H,2H2 +INSTRUMENTTYPE: LC-ESI-ITFT +COLLISIONENERGY: 60 % (nominal) +FORMULA: C6H6O4 +COMPOUND_NAME: Kojic acid +PRECURSORTYPE: [M-H]- +PRECURSORMZ: 141.0193 +IONMODE: negative +NUM PEAKS: 1 +141.0194 999.0 + +INCHI: InChI=1S/C18H22N2/c1-19-12-14-20(15-13-19)18(16-8-4-2-5-9-16)17-10-6-3-7-11-17/h2-11,18H,12-15H2,1H3 +INSTRUMENTTYPE: LC-ESI-ITFT +COLLISIONENERGY: 85% (nominal) +FORMULA: C18H22N2 +COMPOUND_NAME: Cyclizine +PRECURSORTYPE: [M+H]+ +PRECURSORMZ: 267.1856 +IONMODE: positive +NUM PEAKS: 3 +99.0917 6.0 +165.0698 2.0 +167.0856 999.0 diff --git a/tools/matchms/test-data/metadata_merge/metadata.csv b/tools/matchms/test-data/metadata_merge/metadata.csv new file mode 100644 index 00000000..94308d1e --- /dev/null +++ b/tools/matchms/test-data/metadata_merge/metadata.csv @@ -0,0 +1,4 @@ +compound_name,inchikey,SMILES +ADP,XTWYTFMLZFPYCI-UHFFFAOYSA-N,C1=NC2=C(C(=N1)N)N=CN2C3C(C(C(O3)COP(=O)(O)OP(=O)(O)O)O)O +Kojic acid,BEJNERDRQOWKJM-UHFFFAOYSA-N,C1=C(OC=C(C1=O)O)CO +Cyclizine,,CN1CCN(CC1)C(C2=CC=CC=C2)C3=CC=CC=C3 \ No newline at end of file diff --git a/tools/matchms/test-data/metadata_merge/output.msp b/tools/matchms/test-data/metadata_merge/output.msp new file mode 100644 index 00000000..be95c026 --- /dev/null +++ b/tools/matchms/test-data/metadata_merge/output.msp @@ -0,0 +1,41 @@ +INCHI: InChI=1S/C10H15N5O10P2/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10)1-23-27(21,22)25-26(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H2,11,12,13)(H2,18,19,20) +FORMULA: C10H15N5O10P2 +COMPOUND_NAME: ADP +IONMODE: positive +INSTRUMENT_TYPE: LC-ESI-QQ +COLLISION_ENERGY: 40 +PRECURSOR_MZ: 428.31 +INCHIKEY: XTWYTFMLZFPYCI-UHFFFAOYSA-N +SMILES: C1=NC2=C(C(=N1)N)N=CN2C3C(C(C(O3)COP(=O)(O)OP(=O)(O)O)O)O +NUM PEAKS: 2 +135.0 83.0 +136.0 999.0 + +INCHIKEY: BEJNERDRQOWKJM-UHFFFAOYSA-N +INCHI: InChI=1S/C6H6O4/c7-2-4-1-5(8)6(9)3-10-4/h1,3,7,9H,2H2 +FORMULA: C6H6O4 +COMPOUND_NAME: Kojic acid +IONMODE: negative +INSTRUMENT_TYPE: LC-ESI-ITFT +COLLISION_ENERGY: 60 % (nominal) +ADDUCT: [M-H]- +PRECURSOR_MZ: 141.0193 +SMILES: C1=C(OC=C(C1=O)O)CO +NUM PEAKS: 1 +141.0194 999.0 + +INCHI: InChI=1S/C18H22N2/c1-19-12-14-20(15-13-19)18(16-8-4-2-5-9-16)17-10-6-3-7-11-17/h2-11,18H,12-15H2,1H3 +FORMULA: C18H22N2 +COMPOUND_NAME: Cyclizine +IONMODE: positive +INSTRUMENT_TYPE: LC-ESI-ITFT +COLLISION_ENERGY: 85% (nominal) +ADDUCT: [M+H]+ +PRECURSOR_MZ: 267.1856 +INCHIKEY: nan +SMILES: CN1CCN(CC1)C(C2=CC=CC=C2)C3=CC=CC=C3 +NUM PEAKS: 3 +99.0917 6.0 +165.0698 2.0 +167.0856 999.0 + From ae863913f366a97752b3a3b9e212dccea34b772e Mon Sep 17 00:00:00 2001 From: Zargham Ahmad <46793118+zargham-ahmad@users.noreply.github.com> Date: Fri, 27 Oct 2023 14:09:54 +0200 Subject: [PATCH 3/5] Update tools/matchms/matchms_metadata_merge.xml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Björn Grüning --- tools/matchms/matchms_metadata_merge.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/matchms/matchms_metadata_merge.xml b/tools/matchms/matchms_metadata_merge.xml index 94703d27..5a22a92b 100644 --- a/tools/matchms/matchms_metadata_merge.xml +++ b/tools/matchms/matchms_metadata_merge.xml @@ -18,7 +18,7 @@ From 53af14e2d8997a8cd23dff5db12b8bcd2b42d1d6 Mon Sep 17 00:00:00 2001 From: Zargham Ahmad Date: Wed, 22 Nov 2023 21:06:12 +0100 Subject: [PATCH 4/5] updated matchms_metadata_merge tool and testdata --- tools/matchms/matchms_metadata_merge.xml | 1 + .../test-data/metadata_merge/input.msp | 6 ++-- .../test-data/metadata_merge/metadata.csv | 2 +- .../test-data/metadata_merge/output.msp | 28 +++++++++---------- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/tools/matchms/matchms_metadata_merge.xml b/tools/matchms/matchms_metadata_merge.xml index 5a22a92b..c5fea2bf 100644 --- a/tools/matchms/matchms_metadata_merge.xml +++ b/tools/matchms/matchms_metadata_merge.xml @@ -28,6 +28,7 @@ import matchms import numpy as np matchms.set_matchms_logger_level('ERROR') +matchms.Metadata.set_key_replacements({}) spectra = list(matchms.importing.load_from_msp('${spectral_library}', False)) diff --git a/tools/matchms/test-data/metadata_merge/input.msp b/tools/matchms/test-data/metadata_merge/input.msp index 6e0a800f..396d8899 100644 --- a/tools/matchms/test-data/metadata_merge/input.msp +++ b/tools/matchms/test-data/metadata_merge/input.msp @@ -2,7 +2,7 @@ INCHI: InChI=1S/C10H15N5O10P2/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10 INSTRUMENTTYPE: LC-ESI-QQ COLLISIONENERGY: 40 FORMULA: C10H15N5O10P2 -COMPOUND_NAME: ADP +NAME: ADP PRECURSORMZ: 428.31 IONMODE: positive NUM PEAKS: 2 @@ -14,7 +14,7 @@ INCHI: InChI=1S/C6H6O4/c7-2-4-1-5(8)6(9)3-10-4/h1,3,7,9H,2H2 INSTRUMENTTYPE: LC-ESI-ITFT COLLISIONENERGY: 60 % (nominal) FORMULA: C6H6O4 -COMPOUND_NAME: Kojic acid +NAME: Kojic acid PRECURSORTYPE: [M-H]- PRECURSORMZ: 141.0193 IONMODE: negative @@ -25,7 +25,7 @@ INCHI: InChI=1S/C18H22N2/c1-19-12-14-20(15-13-19)18(16-8-4-2-5-9-16)17-10-6-3-7- INSTRUMENTTYPE: LC-ESI-ITFT COLLISIONENERGY: 85% (nominal) FORMULA: C18H22N2 -COMPOUND_NAME: Cyclizine +NAME: Cyclizine PRECURSORTYPE: [M+H]+ PRECURSORMZ: 267.1856 IONMODE: positive diff --git a/tools/matchms/test-data/metadata_merge/metadata.csv b/tools/matchms/test-data/metadata_merge/metadata.csv index 94308d1e..0155e2b0 100644 --- a/tools/matchms/test-data/metadata_merge/metadata.csv +++ b/tools/matchms/test-data/metadata_merge/metadata.csv @@ -1,4 +1,4 @@ -compound_name,inchikey,SMILES +Name,inchikey,SMILES ADP,XTWYTFMLZFPYCI-UHFFFAOYSA-N,C1=NC2=C(C(=N1)N)N=CN2C3C(C(C(O3)COP(=O)(O)OP(=O)(O)O)O)O Kojic acid,BEJNERDRQOWKJM-UHFFFAOYSA-N,C1=C(OC=C(C1=O)O)CO Cyclizine,,CN1CCN(CC1)C(C2=CC=CC=C2)C3=CC=CC=C3 \ No newline at end of file diff --git a/tools/matchms/test-data/metadata_merge/output.msp b/tools/matchms/test-data/metadata_merge/output.msp index be95c026..6b6aa7da 100644 --- a/tools/matchms/test-data/metadata_merge/output.msp +++ b/tools/matchms/test-data/metadata_merge/output.msp @@ -1,10 +1,10 @@ INCHI: InChI=1S/C10H15N5O10P2/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10)1-23-27(21,22)25-26(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H2,11,12,13)(H2,18,19,20) +INSTRUMENTTYPE: LC-ESI-QQ +COLLISIONENERGY: 40 FORMULA: C10H15N5O10P2 -COMPOUND_NAME: ADP +NAME: ADP +PRECURSORMZ: 428.31 IONMODE: positive -INSTRUMENT_TYPE: LC-ESI-QQ -COLLISION_ENERGY: 40 -PRECURSOR_MZ: 428.31 INCHIKEY: XTWYTFMLZFPYCI-UHFFFAOYSA-N SMILES: C1=NC2=C(C(=N1)N)N=CN2C3C(C(C(O3)COP(=O)(O)OP(=O)(O)O)O)O NUM PEAKS: 2 @@ -13,25 +13,25 @@ NUM PEAKS: 2 INCHIKEY: BEJNERDRQOWKJM-UHFFFAOYSA-N INCHI: InChI=1S/C6H6O4/c7-2-4-1-5(8)6(9)3-10-4/h1,3,7,9H,2H2 +INSTRUMENTTYPE: LC-ESI-ITFT +COLLISIONENERGY: 60 % (nominal) FORMULA: C6H6O4 -COMPOUND_NAME: Kojic acid +NAME: Kojic acid +PRECURSORTYPE: [M-H]- +PRECURSORMZ: 141.0193 IONMODE: negative -INSTRUMENT_TYPE: LC-ESI-ITFT -COLLISION_ENERGY: 60 % (nominal) -ADDUCT: [M-H]- -PRECURSOR_MZ: 141.0193 SMILES: C1=C(OC=C(C1=O)O)CO NUM PEAKS: 1 141.0194 999.0 INCHI: InChI=1S/C18H22N2/c1-19-12-14-20(15-13-19)18(16-8-4-2-5-9-16)17-10-6-3-7-11-17/h2-11,18H,12-15H2,1H3 +INSTRUMENTTYPE: LC-ESI-ITFT +COLLISIONENERGY: 85% (nominal) FORMULA: C18H22N2 -COMPOUND_NAME: Cyclizine +NAME: Cyclizine +PRECURSORTYPE: [M+H]+ +PRECURSORMZ: 267.1856 IONMODE: positive -INSTRUMENT_TYPE: LC-ESI-ITFT -COLLISION_ENERGY: 85% (nominal) -ADDUCT: [M+H]+ -PRECURSOR_MZ: 267.1856 INCHIKEY: nan SMILES: CN1CCN(CC1)C(C2=CC=CC=C2)C3=CC=CC=C3 NUM PEAKS: 3 From a89c2d0c9aacccf868329f135fb7a98b8d411e1b Mon Sep 17 00:00:00 2001 From: Zargham Ahmad Date: Wed, 22 Nov 2023 21:26:15 +0100 Subject: [PATCH 5/5] updated test --- tools/matchms/matchms_metadata_merge.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/matchms/matchms_metadata_merge.xml b/tools/matchms/matchms_metadata_merge.xml index c5fea2bf..ec35206f 100644 --- a/tools/matchms/matchms_metadata_merge.xml +++ b/tools/matchms/matchms_metadata_merge.xml @@ -75,7 +75,7 @@ matchms.exporting.save_as_msp(merged_array.tolist(), '${output}') - +