From 4be7601fa6c89054ff1fc5eb2c27375db8dff9e9 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Fri, 28 Jun 2024 19:27:18 +0300 Subject: [PATCH 1/2] #1188 HELM ver 2.04 support multiple sequences and connection tables (import and export) Add HELM 2.04 base support. Add UT. --- api/c/indigo/src/indigo_molecule.cpp | 2 +- api/c/indigo/src/indigo_savers.cpp | 2 +- .../ref/formats/helm_to_ket.py.out | 4 + .../ref/formats/ket_to_helm.py.out | 3 + .../integration/tests/formats/helm_to_ket.py | 71 + .../integration/tests/formats/ket_to_helm.py | 73 + .../tests/formats/ref/helm_annotations.ket | 1289 +++++++++ .../tests/formats/ref/helm_chem_peptide.ket | 2342 +++++++++++++++++ .../tests/formats/ref/helm_multi_char_rna.ket | 1633 ++++++++++++ .../tests/formats/ref/helm_peptide.ket | 535 ++++ .../tests/formats/ref/helm_simple_rna.ket | 1603 +++++++++++ core/indigo-core/molecule/monomer_commons.h | 17 + core/indigo-core/molecule/monomers_lib.h | 4 +- core/indigo-core/molecule/sequence_loader.h | 6 + core/indigo-core/molecule/sequence_saver.h | 6 +- .../molecule/src/monomer_commons.cpp | 23 + .../molecule/src/sequence_loader.cpp | 437 +++ .../molecule/src/sequence_saver.cpp | 489 +++- 18 files changed, 8403 insertions(+), 136 deletions(-) create mode 100644 api/tests/integration/ref/formats/helm_to_ket.py.out create mode 100644 api/tests/integration/ref/formats/ket_to_helm.py.out create mode 100644 api/tests/integration/tests/formats/helm_to_ket.py create mode 100644 api/tests/integration/tests/formats/ket_to_helm.py create mode 100644 api/tests/integration/tests/formats/ref/helm_annotations.ket create mode 100644 api/tests/integration/tests/formats/ref/helm_chem_peptide.ket create mode 100644 api/tests/integration/tests/formats/ref/helm_multi_char_rna.ket create mode 100644 api/tests/integration/tests/formats/ref/helm_peptide.ket create mode 100644 api/tests/integration/tests/formats/ref/helm_simple_rna.ket diff --git a/api/c/indigo/src/indigo_molecule.cpp b/api/c/indigo/src/indigo_molecule.cpp index d0b5d44e06..d55f3c2082 100644 --- a/api/c/indigo/src/indigo_molecule.cpp +++ b/api/c/indigo/src/indigo_molecule.cpp @@ -686,7 +686,7 @@ CEXPORT int indigoLoadHelm(int source) std::unique_ptr molptr = std::make_unique(); Molecule& mol = molptr->mol; - // loader.loadHelm(mol); + loader.loadHELM(mol); return self.addObject(molptr.release()); } INDIGO_END(-1); diff --git a/api/c/indigo/src/indigo_savers.cpp b/api/c/indigo/src/indigo_savers.cpp index 1b4c4d4c66..06abce6cd4 100644 --- a/api/c/indigo/src/indigo_savers.cpp +++ b/api/c/indigo/src/indigo_savers.cpp @@ -649,7 +649,7 @@ CEXPORT int indigoSaveHelm(int item, int output) { SequenceSaver saver(out); BaseMolecule& mol = obj.getBaseMolecule(); - // saver.saveMolecule(mol, SequenceSaver::SeqFormat::HELM); + saver.saveMolecule(mol, SequenceSaver::SeqFormat::HELM); out.flush(); return 1; } diff --git a/api/tests/integration/ref/formats/helm_to_ket.py.out b/api/tests/integration/ref/formats/helm_to_ket.py.out new file mode 100644 index 0000000000..a449526b5b --- /dev/null +++ b/api/tests/integration/ref/formats/helm_to_ket.py.out @@ -0,0 +1,4 @@ +*** HELM to KET *** +helm_multi_char_rna.ket:SUCCEED +helm_simple_rna.ket:SUCCEED +Test 'PEPTIDE1{A'2'}$$$$V2.0': got expected error 'Repeating do not supported now.' diff --git a/api/tests/integration/ref/formats/ket_to_helm.py.out b/api/tests/integration/ref/formats/ket_to_helm.py.out new file mode 100644 index 0000000000..f7f24ed0b7 --- /dev/null +++ b/api/tests/integration/ref/formats/ket_to_helm.py.out @@ -0,0 +1,3 @@ +*** KET to HELM *** +helm_multi_char_rna.ket:SUCCEED +helm_simple_rna.ket:SUCCEED diff --git a/api/tests/integration/tests/formats/helm_to_ket.py b/api/tests/integration/tests/formats/helm_to_ket.py new file mode 100644 index 0000000000..a82368519f --- /dev/null +++ b/api/tests/integration/tests/formats/helm_to_ket.py @@ -0,0 +1,71 @@ +import difflib +import os +import sys + + +def find_diff(a, b): + return "\n".join(difflib.unified_diff(a.splitlines(), b.splitlines())) + + +sys.path.append( + os.path.normpath( + os.path.join(os.path.abspath(__file__), "..", "..", "..", "common") + ) +) +from env_indigo import ( # noqa + Indigo, + IndigoException, + getIndigoExceptionText, + joinPathPy, +) + +indigo = Indigo() +indigo.setOption("json-saving-pretty", True) +indigo.setOption("ignore-stereochemistry-errors", True) + +print("*** HELM to KET ***") + +root = joinPathPy("molecules/", __file__) +ref_path = joinPathPy("ref/", __file__) + +helm_data = { + "helm_simple_rna": "RNA1{R(U)P.R(T)P.R(G)P.R(C)P.R(A)}$$$$V2.0", + "helm_multi_char_rna": "RNA1{R(U)P.R(T)P.R(G)P.R(C)P.R([daA])}$$$$V2.0", + "helm_peptide": "PEPTIDE1{A.[meA].C}$$$$V2.0", + "helm_chem_peptide": "CHEM1{[PEG2]}|PEPTIDE1{W.N.D.[Pen].G.[Orn].D.A.D.G.S.G.[Cap]}$CHEM1,PEPTIDE1,1:R1-1:R0$$$V2.0", + "helm_annotations": 'BLOB1{BEAD}"Animated Polystyrene"|CHEM1{[hxy]"Annotation"}|RNA1{R(A"mutation")P.R(U)P.R(G)P}$$$$V2.0', +} + +lib = indigo.loadMoleculeFromFile( + os.path.join(ref_path, "monomer_library.ket") +) + +for filename in sorted(helm_data.keys()): + mol = indigo.loadHelm(helm_data[filename]) + with open(os.path.join(ref_path, filename) + ".ket", "w") as file: + file.write(mol.json()) + with open(os.path.join(ref_path, filename) + ".ket", "r") as file: + ket_ref = file.read() + ket = mol.json() + diff = find_diff(ket_ref, ket) + if not diff: + print(filename + ".ket:SUCCEED") + else: + print(filename + ".ket:FAILED") + print(diff) + +helm_errors = {"PEPTIDE1{A'2'}$$$$V2.0": "Repeating do not supported now."} +for helm_seq in sorted(helm_errors.keys()): + error = helm_errors[helm_seq] + try: + mol = indigo.loadHelm(helm_seq) + print("Test %s failed: exception expected." % helm_seq) + except IndigoException as e: + text = getIndigoExceptionText(e) + if error in text: + print("Test '%s': got expected error '%s'" % (helm_seq, error)) + else: + print( + "Test '%s': expected error '%s' but got '%s'" + % (helm_seq, error, text) + ) diff --git a/api/tests/integration/tests/formats/ket_to_helm.py b/api/tests/integration/tests/formats/ket_to_helm.py new file mode 100644 index 0000000000..673ceab390 --- /dev/null +++ b/api/tests/integration/tests/formats/ket_to_helm.py @@ -0,0 +1,73 @@ +import difflib +import os +import sys + + +def find_diff(a, b): + return "\n".join(difflib.unified_diff(a.splitlines(), b.splitlines())) + + +sys.path.append( + os.path.normpath( + os.path.join(os.path.abspath(__file__), "..", "..", "..", "common") + ) +) +from env_indigo import ( # noqa + Indigo, + IndigoException, + getIndigoExceptionText, + joinPathPy, +) + +indigo = Indigo() +indigo.setOption("ignore-stereochemistry-errors", True) + +print("*** KET to HELM ***") + +root = joinPathPy("molecules/", __file__) +ref = joinPathPy("ref/", __file__) + +indigo.loadMoleculeFromFile(os.path.join(ref, "monomer_library.ket")) + +# same ref ket files used to check idt-to-ket and to check ket-to-idt +helm_data = { + "helm_simple_rna": "RNA1{R(U)P.R(T)P.R(G)P.R(C)P.R(A)}$$$$V2.0", + "helm_multi_char_rna": "RNA1{R(U)P.R(T)P.R(G)P.R(C)P.R([daA])}$$$$V2.0", + "helm_peptide": "PEPTIDE1{A.[meA].C}$$$$V2.0", + "helm_chem_peptide": "CHEM1{[PEG2]}|PEPTIDE1{W.N.D.[Pen].G.[Orn].D.A.D.G.S.G.[Cap]}$CHEM1,PEPTIDE1,1:R1-1:R0$$$V2.0", + "helm_annotations": "CHEM1{[hxy]}|RNA1{R(A)P.R(U)P.R(G)P}$$$$V2.0", +} + +for filename in sorted(helm_data.keys()): + mol = indigo.loadMoleculeFromFile(os.path.join(ref, filename + ".ket")) + idt = mol.helm() + idt_ref = helm_data[filename] + if idt_ref == idt: + print(filename + ".ket:SUCCEED") + else: + print( + "%s.idt FAILED : expected '%s', got '%s'" + % (filename, idt_ref, idt) + ) + +helm_errors = {} +for filename in sorted(helm_errors.keys()): + error = helm_errors[filename] + try: + mol = indigo.loadMoleculeFromFile( + os.path.join(root, filename + ".ket") + ) + idt = mol.idt() + print( + "Test %s failed: exception expected but got next idt - '%s'." + % (filename, idt) + ) + except IndigoException as e: + text = getIndigoExceptionText(e) + if error in text: + print("Test %s: got expected error '%s'" % (filename, error)) + else: + print( + "Test %s: expected error '%s' but got '%s'" + % (filename, error, text) + ) diff --git a/api/tests/integration/tests/formats/ref/helm_annotations.ket b/api/tests/integration/tests/formats/ref/helm_annotations.ket new file mode 100644 index 0000000000..9344787517 --- /dev/null +++ b/api/tests/integration/tests/formats/ref/helm_annotations.ket @@ -0,0 +1,1289 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer0" + }, + { + "$ref": "monomer1" + }, + { + "$ref": "monomer2" + }, + { + "$ref": "monomer3" + }, + { + "$ref": "monomer4" + }, + { + "$ref": "monomer5" + }, + { + "$ref": "monomer6" + }, + { + "$ref": "monomer7" + }, + { + "$ref": "monomer8" + }, + { + "$ref": "monomer9" + } + ], + "connections": [ + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer1", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer2", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer1", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer3", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer3", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer4", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer4", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer5", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer4", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer6", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer6", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer7", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer7", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer8", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer7", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer9", + "attachmentPointId": "R1" + } + } + ], + "templates": [ + { + "$ref": "monomerTemplate-hxy___Hexynyl alcohol" + }, + { + "$ref": "monomerTemplate-R___Ribose" + }, + { + "$ref": "monomerTemplate-A___Adenine" + }, + { + "$ref": "monomerTemplate-P___Phosphate" + }, + { + "$ref": "monomerTemplate-U___Uracil" + }, + { + "$ref": "monomerTemplate-G___Guanine" + } + ] + }, + "monomer0": { + "type": "monomer", + "id": "0", + "position": { + "x": 0.0, + "y": -1.600000023841858 + }, + "alias": "hxy", + "templateId": "hxy___Hexynyl alcohol" + }, + "monomer1": { + "type": "monomer", + "id": "1", + "seqid": 1, + "position": { + "x": 0.0, + "y": -3.200000047683716 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "monomer2": { + "type": "monomer", + "id": "2", + "seqid": 2, + "position": { + "x": 0.0, + "y": -4.800000190734863 + }, + "alias": "A", + "templateId": "A___Adenine" + }, + "monomer3": { + "type": "monomer", + "id": "3", + "seqid": 3, + "position": { + "x": 1.600000023841858, + "y": -3.200000047683716 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomer4": { + "type": "monomer", + "id": "4", + "seqid": 4, + "position": { + "x": 3.200000047683716, + "y": -3.200000047683716 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "monomer5": { + "type": "monomer", + "id": "5", + "seqid": 5, + "position": { + "x": 3.200000047683716, + "y": -4.800000190734863 + }, + "alias": "U", + "templateId": "U___Uracil" + }, + "monomer6": { + "type": "monomer", + "id": "6", + "seqid": 6, + "position": { + "x": 4.800000190734863, + "y": -3.200000047683716 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomer7": { + "type": "monomer", + "id": "7", + "seqid": 7, + "position": { + "x": 6.400000095367432, + "y": -3.200000047683716 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "monomer8": { + "type": "monomer", + "id": "8", + "seqid": 8, + "position": { + "x": 6.400000095367432, + "y": -4.800000190734863 + }, + "alias": "G", + "templateId": "G___Guanine" + }, + "monomer9": { + "type": "monomer", + "id": "9", + "seqid": 9, + "position": { + "x": 8.0, + "y": -3.200000047683716 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomerTemplate-hxy___Hexynyl alcohol": { + "type": "monomerTemplate", + "id": "hxy___Hexynyl alcohol", + "class": "Chem", + "classHELM": "CHEM", + "alias": "hxy", + "name": "hxy", + "fullName": "Hexynyl alcohol", + "attachmentPoints": [ + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 2 + ] + } + } + ], + "atoms": [ + { + "label": "O", + "location": [ + 1.4606000185012818, + 0.15029999613761903, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 2.2118000984191896, + -1.1490000486373902, + 0.0 + ] + }, + { + "label": "H", + "location": [ + 0.2606000006198883, + 0.15029999613761903, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.712599992752075, + -1.1490999460220338, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 4.463799953460693, + -2.4484000205993654, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 5.964700222015381, + -2.4484000205993654, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 6.715799808502197, + -3.7476999759674074, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 7.316500186920166, + -4.786600112915039, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 5, + 6 + ] + }, + { + "type": 3, + "atoms": [ + 6, + 7 + ] + } + ] + }, + "monomerTemplate-R___Ribose": { + "type": "monomerTemplate", + "id": "R___Ribose", + "class": "Sugar", + "classHELM": "RNA", + "alias": "R", + "name": "Rib", + "fullName": "Ribose", + "naturalAnalogShort": "R", + "naturalAnalog": "Rib", + "attachmentPoints": [ + { + "attachmentAtom": 9, + "leavingGroup": { + "atoms": [ + 10 + ] + } + }, + { + "attachmentAtom": 5, + "leavingGroup": { + "atoms": [ + 11 + ] + } + }, + { + "attachmentAtom": 2, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "O", + "location": [ + -1.101699948310852, + -1.0663000345230103, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.5896999835968018, + 0.34360000491142275, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.08089999854564667, + -1.9888999462127686, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.909500002861023, + 0.2924000024795532, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 1.3238999843597413, + -1.1492999792099, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "O", + "location": [ + 1.8285000324249268, + 1.4754999876022339, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.4518001079559328, + -1.558899998664856, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.430999994277954, + 1.583400011062622, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.039900001138448718, + -3.1881000995635988, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -2.9279000759124758, + 1.4754999876022339, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.6017000675201418, + 2.468400001525879, + 0.0 + ] + }, + { + "label": "H", + "location": [ + 3.017400026321411, + 1.3125, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 7 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 2, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 8 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 4, + 6 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 11 + ] + }, + { + "type": 1, + "atoms": [ + 7, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 9, + 10 + ] + } + ] + }, + "monomerTemplate-A___Adenine": { + "type": "monomerTemplate", + "id": "A___Adenine", + "class": "Base", + "classHELM": "RNA", + "alias": "A", + "name": "Ade", + "fullName": "Adenine", + "naturalAnalogShort": "A", + "naturalAnalog": "Ade", + "attachmentPoints": [ + { + "attachmentAtom": 6, + "leavingGroup": { + "atoms": [ + 10 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.0354000329971314, + 0.24979999661445619, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.07919999957084656, + -0.7540000081062317, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5056999921798707, + -0.2906000018119812, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.8177000284194947, + 1.1765999794006348, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.7031000256538391, + 2.1803998947143556, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 0.7235000133514404, + 1.7170000076293946, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -2.3870999813079836, + -1.5033999681472779, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5053000450134278, + -2.7167999744415285, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -0.0786999985575676, + -2.253200054168701, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 2.176800012588501, + -0.120899997651577, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.587100028991699, + -1.5033999681472779, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 10 + ] + }, + { + "type": 2, + "atoms": [ + 7, + 8 + ] + } + ] + }, + "monomerTemplate-P___Phosphate": { + "type": "monomerTemplate", + "id": "P___Phosphate", + "class": "Phosphate", + "classHELM": "RNA", + "alias": "P", + "name": "P", + "fullName": "Phosphate", + "naturalAnalogShort": "P", + "attachmentPoints": [ + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 1 + ] + } + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 3 + ] + } + } + ], + "atoms": [ + { + "label": "P", + "location": [ + -0.23989999294281007, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -1.4399000406265259, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.3598000109195709, + -1.0393999814987183, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.960099995136261, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.3598000109195709, + 1.0393999814987183, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 4 + ] + } + ] + }, + "monomerTemplate-U___Uracil": { + "type": "monomerTemplate", + "id": "U___Uracil", + "class": "Base", + "classHELM": "RNA", + "alias": "U", + "name": "Ura", + "fullName": "Uracil", + "naturalAnalogShort": "U", + "naturalAnalog": "Ura", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.8617000579833985, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.1117000579833985, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.38830000162124636, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.138200044631958, + 1.350000023841858, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.3882000148296356, + 2.6489999294281008, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 1.1117000579833985, + 2.648900032043457, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 3.061800003051758, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -0.9882000088691711, + 3.688199996948242, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.3382999897003176, + 1.350000023841858, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 0, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 8 + ] + }, + { + "type": 2, + "atoms": [ + 4, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + } + ] + }, + "monomerTemplate-G___Guanine": { + "type": "monomerTemplate", + "id": "G___Guanine", + "class": "Base", + "classHELM": "RNA", + "alias": "G", + "name": "Gua", + "fullName": "Guanine", + "naturalAnalogShort": "G", + "naturalAnalog": "Gua", + "attachmentPoints": [ + { + "attachmentAtom": 6, + "leavingGroup": { + "atoms": [ + 11 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.0354000329971314, + 0.24979999661445619, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.07919999957084656, + -0.7540000081062317, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5056999921798707, + -0.2906000018119812, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.8177000284194947, + 1.1765999794006348, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.7031000256538391, + 2.1803998947143556, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 0.7235000133514404, + 1.7170000076293946, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -2.3870999813079836, + -1.5033999681472779, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5053000450134278, + -2.7167999744415285, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -0.0786999985575676, + -2.253200054168701, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.176800012588501, + -0.120899997651577, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -0.9527000188827515, + 3.3541998863220217, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.587100028991699, + -1.5033999681472779, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 11 + ] + }, + { + "type": 2, + "atoms": [ + 7, + 8 + ] + } + ] + } +} \ No newline at end of file diff --git a/api/tests/integration/tests/formats/ref/helm_chem_peptide.ket b/api/tests/integration/tests/formats/ref/helm_chem_peptide.ket new file mode 100644 index 0000000000..24b9933141 --- /dev/null +++ b/api/tests/integration/tests/formats/ref/helm_chem_peptide.ket @@ -0,0 +1,2342 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer0" + }, + { + "$ref": "monomer1" + }, + { + "$ref": "monomer2" + }, + { + "$ref": "monomer3" + }, + { + "$ref": "monomer4" + }, + { + "$ref": "monomer5" + }, + { + "$ref": "monomer6" + }, + { + "$ref": "monomer7" + }, + { + "$ref": "monomer8" + }, + { + "$ref": "monomer9" + }, + { + "$ref": "monomer10" + }, + { + "$ref": "monomer11" + }, + { + "$ref": "monomer12" + }, + { + "$ref": "monomer13" + } + ], + "connections": [ + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer1", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer1", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer2", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer2", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer3", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer3", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer4", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer4", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer5", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer5", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer6", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer6", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer7", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer7", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer8", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer8", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer9", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer9", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer10", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer10", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer11", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer11", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer12", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer12", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer13", + "attachmentPointId": "R1" + } + } + ], + "templates": [ + { + "$ref": "monomerTemplate-PEG2___Diethylene Glycol" + }, + { + "$ref": "monomerTemplate-W___Tryptophan" + }, + { + "$ref": "monomerTemplate-N___Asparagine" + }, + { + "$ref": "monomerTemplate-D___Aspartic acid" + }, + { + "$ref": "monomerTemplate-Pen___penicillamine (3-mercaptovaline)" + }, + { + "$ref": "monomerTemplate-G___Glycine" + }, + { + "$ref": "monomerTemplate-Orn___L-ornithine" + }, + { + "$ref": "monomerTemplate-A___Alanine" + }, + { + "$ref": "monomerTemplate-S___Serine" + }, + { + "$ref": "monomerTemplate-Cap___gamma-amino-beta-hydroxycyclohexanepentanoic acid" + } + ] + }, + "monomer0": { + "type": "monomer", + "id": "0", + "position": { + "x": 0.0, + "y": -0.0 + }, + "alias": "PEG2", + "templateId": "PEG2___Diethylene Glycol" + }, + "monomer1": { + "type": "monomer", + "id": "1", + "seqid": 2, + "position": { + "x": 0.0, + "y": -1.600000023841858 + }, + "alias": "W", + "templateId": "W___Tryptophan" + }, + "monomer2": { + "type": "monomer", + "id": "2", + "seqid": 3, + "position": { + "x": 1.600000023841858, + "y": -1.600000023841858 + }, + "alias": "N", + "templateId": "N___Asparagine" + }, + "monomer3": { + "type": "monomer", + "id": "3", + "seqid": 4, + "position": { + "x": 3.200000047683716, + "y": -1.600000023841858 + }, + "alias": "D", + "templateId": "D___Aspartic acid" + }, + "monomer4": { + "type": "monomer", + "id": "4", + "seqid": 5, + "position": { + "x": 4.800000190734863, + "y": -1.600000023841858 + }, + "alias": "Pen", + "templateId": "Pen___penicillamine (3-mercaptovaline)" + }, + "monomer5": { + "type": "monomer", + "id": "5", + "seqid": 6, + "position": { + "x": 6.400000095367432, + "y": -1.600000023841858 + }, + "alias": "G", + "templateId": "G___Glycine" + }, + "monomer6": { + "type": "monomer", + "id": "6", + "seqid": 7, + "position": { + "x": 8.0, + "y": -1.600000023841858 + }, + "alias": "Orn", + "templateId": "Orn___L-ornithine" + }, + "monomer7": { + "type": "monomer", + "id": "7", + "seqid": 8, + "position": { + "x": 9.600000381469727, + "y": -1.600000023841858 + }, + "alias": "D", + "templateId": "D___Aspartic acid" + }, + "monomer8": { + "type": "monomer", + "id": "8", + "seqid": 9, + "position": { + "x": 11.199999809265137, + "y": -1.600000023841858 + }, + "alias": "A", + "templateId": "A___Alanine" + }, + "monomer9": { + "type": "monomer", + "id": "9", + "seqid": 10, + "position": { + "x": 12.800000190734864, + "y": -1.600000023841858 + }, + "alias": "D", + "templateId": "D___Aspartic acid" + }, + "monomer10": { + "type": "monomer", + "id": "10", + "seqid": 11, + "position": { + "x": 14.40000057220459, + "y": -1.600000023841858 + }, + "alias": "G", + "templateId": "G___Glycine" + }, + "monomer11": { + "type": "monomer", + "id": "11", + "seqid": 12, + "position": { + "x": 16.0, + "y": -1.600000023841858 + }, + "alias": "S", + "templateId": "S___Serine" + }, + "monomer12": { + "type": "monomer", + "id": "12", + "seqid": 13, + "position": { + "x": 17.600000381469728, + "y": -1.600000023841858 + }, + "alias": "G", + "templateId": "G___Glycine" + }, + "monomer13": { + "type": "monomer", + "id": "13", + "seqid": 14, + "position": { + "x": 19.200000762939454, + "y": -1.600000023841858 + }, + "alias": "Cap", + "templateId": "Cap___gamma-amino-beta-hydroxycyclohexanepentanoic acid" + }, + "monomerTemplate-PEG2___Diethylene Glycol": { + "type": "monomerTemplate", + "id": "PEG2___Diethylene Glycol", + "class": "Chem", + "classHELM": "CHEM", + "alias": "PEG2", + "name": "PEG2", + "fullName": "Diethylene Glycol", + "attachmentPoints": [ + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 8 + ] + } + }, + { + "attachmentAtom": 6, + "leavingGroup": { + "atoms": [ + 7 + ] + } + } + ], + "atoms": [ + { + "label": "O", + "location": [ + -3.899899959564209, + -0.3833000063896179, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -2.599900007247925, + 0.3666999936103821, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.2999999523162842, + -0.3833000063896179, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.0, + 0.3666999936103821, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.2999999523162842, + -0.3833000063896179, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 2.5999999046325685, + 0.3666999936103821, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 3.9000000953674318, + -0.3833000063896179, + 0.0 + ] + }, + { + "label": "H", + "location": [ + 4.9394001960754398, + 0.21639999747276307, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -4.9394001960754398, + 0.21639999747276307, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 5, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 8 + ] + } + ] + }, + "monomerTemplate-W___Tryptophan": { + "type": "monomerTemplate", + "id": "W___Tryptophan", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "W", + "name": "Trp", + "fullName": "Tryptophan", + "naturalAnalogShort": "W", + "naturalAnalog": "Trp", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 4 + ] + } + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 15 + ] + } + }, + { + "attachmentAtom": 8, + "leavingGroup": { + "atoms": [ + 16 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 2.0938000679016115, + -2.35509991645813, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.0952000617980959, + -3.299999952316284, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.0698000192642213, + -1.7652000188827515, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "N", + "location": [ + 0.045899998396635059, + -2.35509991645813, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -0.7723000049591065, + -1.882599949836731, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.0679999589920045, + -0.5835000276565552, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.04580000042915344, + 0.005499999970197678, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.0319000482559205, + -0.4778999984264374, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.8245999813079835, + 0.3977999985218048, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.2369999885559083, + 1.4215999841690064, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.08100000023841858, + 1.1792999505996705, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.7067999839782715, + 2.0590999126434328, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.33869999647140505, + 3.1814000606536867, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.817300021648407, + 3.423799991607666, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.6051000356674195, + 2.543800115585327, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.911400079727173, + -1.8818000555038453, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.0060999393463136, + 0.388700008392334, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 1, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 6 + ] + }, + { + "type": 2, + "atoms": [ + 7, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 9, + 8 + ] + }, + { + "type": 1, + "atoms": [ + 10, + 6 + ] + }, + { + "type": 2, + "atoms": [ + 10, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 10, + 11 + ] + }, + { + "type": 2, + "atoms": [ + 11, + 12 + ] + }, + { + "type": 1, + "atoms": [ + 12, + 13 + ] + }, + { + "type": 1, + "atoms": [ + 14, + 9 + ] + }, + { + "type": 2, + "atoms": [ + 13, + 14 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 15 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 16 + ] + } + ] + }, + "monomerTemplate-N___Asparagine": { + "type": "monomerTemplate", + "id": "N___Asparagine", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "N", + "name": "Asn", + "fullName": "Asparagine", + "naturalAnalogShort": "N", + "naturalAnalog": "Asn", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 4 + ] + } + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 9 + ] + } + }, + { + "attachmentAtom": 7, + "leavingGroup": { + "atoms": [ + 10 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.892899990081787, + -1.4175000190734864, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.894700050354004, + -2.598900079727173, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.6126999855041504, + -0.6798999905586243, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "N", + "location": [ + -0.6675999760627747, + -1.4175000190734864, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -1.6907000541687012, + -0.8266000151634216, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.6104000210762024, + 0.7978000044822693, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.6697999835014343, + 1.5354000329971314, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.692199945449829, + 0.9434000253677368, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -0.6715999841690064, + 2.7167999744415285, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.915299892425537, + -0.8255000114440918, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.53410005569458, + 1.7724000215530396, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 1, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 2, + "atoms": [ + 6, + 8 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 7, + 10 + ] + } + ] + }, + "monomerTemplate-D___Aspartic acid": { + "type": "monomerTemplate", + "id": "D___Aspartic acid", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "D", + "name": "Asp", + "fullName": "Aspartic acid", + "naturalAnalogShort": "D", + "naturalAnalog": "Asp", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 4 + ] + } + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 9 + ] + } + }, + { + "attachmentAtom": 8, + "leavingGroup": { + "atoms": [ + 10 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.63100004196167, + -1.557800054550171, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.632699966430664, + -2.7392001152038576, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.3506999909877777, + -0.8201000094413757, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "N", + "location": [ + -0.9294999837875366, + -1.557800054550171, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -1.9524999856948853, + -0.9668999910354614, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.34850001335144045, + 0.6575000286102295, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.9316999912261963, + 1.3952000141143799, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -1.954200029373169, + 0.8032000064849854, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -0.9334999918937683, + 2.5766000747680666, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.65339994430542, + -0.9657999873161316, + 0.0 + ] + }, + { + "label": "H", + "location": [ + 0.08510000258684159, + 3.175100088119507, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 1, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 6 + ] + }, + { + "type": 2, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 8 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 10 + ] + } + ] + }, + "monomerTemplate-Pen___penicillamine (3-mercaptovaline)": { + "type": "monomerTemplate", + "id": "Pen___penicillamine (3-mercaptovaline)", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "Pen", + "name": "Pen", + "fullName": "penicillamine (3-mercaptovaline)", + "naturalAnalogShort": "V", + "naturalAnalog": "Val", + "attachmentPoints": [ + { + "attachmentAtom": 6, + "leavingGroup": { + "atoms": [ + 8 + ] + } + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 7 + ] + } + }, + { + "attachmentAtom": 9, + "leavingGroup": { + "atoms": [ + 10 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.3220000267028809, + -1.5015000104904175, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.023099999874830247, + -0.7494999766349793, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 1.0636999607086182, + 1.3503999710083008, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.023900000378489496, + 0.7512999773025513, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.025200000032782556, + 1.951300024986267, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.3213000297546387, + -2.7014999389648439, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.2757999897003174, + -1.5015000104904175, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.361799955368042, + -0.902400016784668, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.316200017929077, + -0.9034000039100647, + 0.0 + ] + }, + { + "label": "S", + "location": [ + -1.274899959564209, + 1.5032999515533448, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -1.2741999626159669, + 2.7032999992370607, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 5, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 3, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 8 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 9, + 10 + ] + } + ] + }, + "monomerTemplate-G___Glycine": { + "type": "monomerTemplate", + "id": "G___Glycine", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "G", + "name": "Gly", + "fullName": "Glycine", + "naturalAnalogShort": "G", + "naturalAnalog": "Gly", + "attachmentPoints": [ + { + "attachmentAtom": 4, + "leavingGroup": { + "atoms": [ + 5 + ] + } + }, + { + "attachmentAtom": 1, + "leavingGroup": { + "atoms": [ + 3 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + -0.33629998564720156, + 0.534600019454956, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.992900013923645, + -0.11069999635219574, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.0781999826431275, + -1.2890000343322755, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.970900058746338, + 0.5519999861717224, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.3259999752044678, + -0.11069999635219574, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.379699945449829, + 0.423799991607666, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + } + ] + }, + "monomerTemplate-Orn___L-ornithine": { + "type": "monomerTemplate", + "id": "Orn___L-ornithine", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "Orn", + "name": "Orn", + "fullName": "L-ornithine", + "naturalAnalogShort": "K", + "naturalAnalog": "Lys", + "attachmentPoints": [ + { + "attachmentAtom": 5, + "leavingGroup": { + "atoms": [ + 8 + ] + } + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 7 + ] + } + }, + { + "attachmentAtom": 4, + "leavingGroup": { + "atoms": [ + 9 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.8932000398635865, + -2.0181000232696535, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.5928000211715698, + -1.2688000202178956, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.590499997138977, + 0.23199999332427979, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.7098000049591065, + 0.9812999963760376, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -2.012500047683716, + 3.231300115585327, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -0.7075999975204468, + -2.0181000232696535, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.8949999809265137, + -3.218100070953369, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.9316999912261965, + -1.4168000221252442, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -1.7467000484466553, + -1.4178999662399293, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.014400005340576, + 4.431399822235107, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.7121000289916992, + 2.482100009918213, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 6, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 5, + 8 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 10, + 4 + ] + } + ] + }, + "monomerTemplate-A___Alanine": { + "type": "monomerTemplate", + "id": "A___Alanine", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "A", + "name": "Ala", + "fullName": "Alanine", + "naturalAnalogShort": "A", + "naturalAnalog": "Ala", + "attachmentPoints": [ + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 6 + ] + } + }, + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 5 + ] + } + } + ], + "atoms": [ + { + "label": "N", + "location": [ + -1.2548999786376954, + -0.3919999897480011, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.2720000147819519, + 0.26330000162124636, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + -0.31029999256134035, + 1.739300012588501, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.052299976348877, + -0.3919999897480011, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.082900047302246, + -1.5721999406814576, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.0353000164031984, + 0.26330000162124636, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.333400011062622, + 0.09049999713897705, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 1, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 6 + ] + } + ] + }, + "monomerTemplate-S___Serine": { + "type": "monomerTemplate", + "id": "S___Serine", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "S", + "name": "Ser", + "fullName": "Serine", + "naturalAnalogShort": "S", + "naturalAnalog": "Ser", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 4 + ] + } + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 7 + ] + } + }, + { + "attachmentAtom": 6, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.3671000003814698, + -1.082900047302246, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.368899941444397, + -2.2643001079559328, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.0869000032544136, + -0.34520000219345095, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "N", + "location": [ + -1.1934000253677369, + -1.082900047302246, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.2165000438690187, + -0.492000013589859, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.08470000326633454, + 1.1324000358581544, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -0.9391000270843506, + 1.7222000360488892, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.3896000385284426, + -0.4909000098705292, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -0.9480999708175659, + 2.903599977493286, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 1, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 8 + ] + } + ] + }, + "monomerTemplate-Cap___gamma-amino-beta-hydroxycyclohexanepentanoic acid": { + "type": "monomerTemplate", + "id": "Cap___gamma-amino-beta-hydroxycyclohexanepentanoic acid", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "Cap", + "name": "Cap", + "fullName": "gamma-amino-beta-hydroxycyclohexanepentanoic acid", + "naturalAnalogShort": "X", + "attachmentPoints": [ + { + "attachmentAtom": 6, + "leavingGroup": { + "atoms": [ + 7 + ] + } + }, + { + "attachmentAtom": 2, + "leavingGroup": { + "atoms": [ + 3 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + -0.2371000051498413, + -1.5700000524520875, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.6554999947547913, + -2.343100070953369, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 1.7711000442504883, + -1.9565999507904053, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.9943000078201295, + -0.7972000241279602, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.6637001037597658, + -2.7297000885009767, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.43230000138282778, + -3.5025999546051027, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.3528000116348267, + -1.9565999507904053, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.2453999519348146, + -1.183500051498413, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.014000000432133675, + -0.4106000065803528, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.9065999984741211, + 0.36239999532699587, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.6833999752998352, + 1.5218000411987305, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5758999586105347, + 2.294800043106079, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.3526999950408936, + 3.454200029373169, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.2370000034570694, + 3.8408000469207765, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.6554999947547913, + 3.067699909210205, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.4323999881744385, + 1.9082000255584717, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 2, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 5 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 0, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 8 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 8, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 9, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 11, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 12, + 11 + ] + }, + { + "type": 1, + "atoms": [ + 13, + 12 + ] + }, + { + "type": 1, + "atoms": [ + 14, + 13 + ] + }, + { + "type": 1, + "atoms": [ + 14, + 15 + ] + }, + { + "type": 1, + "atoms": [ + 10, + 15 + ] + } + ] + } +} \ No newline at end of file diff --git a/api/tests/integration/tests/formats/ref/helm_multi_char_rna.ket b/api/tests/integration/tests/formats/ref/helm_multi_char_rna.ket new file mode 100644 index 0000000000..359699648d --- /dev/null +++ b/api/tests/integration/tests/formats/ref/helm_multi_char_rna.ket @@ -0,0 +1,1633 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer0" + }, + { + "$ref": "monomer1" + }, + { + "$ref": "monomer2" + }, + { + "$ref": "monomer3" + }, + { + "$ref": "monomer4" + }, + { + "$ref": "monomer5" + }, + { + "$ref": "monomer6" + }, + { + "$ref": "monomer7" + }, + { + "$ref": "monomer8" + }, + { + "$ref": "monomer9" + }, + { + "$ref": "monomer10" + }, + { + "$ref": "monomer11" + }, + { + "$ref": "monomer12" + }, + { + "$ref": "monomer13" + } + ], + "connections": [ + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer1", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer2", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer2", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer3", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer3", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer4", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer3", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer5", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer5", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer6", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer6", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer7", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer6", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer8", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer8", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer9", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer9", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer10", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer9", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer11", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer11", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer12", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer12", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer13", + "attachmentPointId": "R1" + } + } + ], + "templates": [ + { + "$ref": "monomerTemplate-R___Ribose" + }, + { + "$ref": "monomerTemplate-U___Uracil" + }, + { + "$ref": "monomerTemplate-P___Phosphate" + }, + { + "$ref": "monomerTemplate-T___Thymine" + }, + { + "$ref": "monomerTemplate-G___Guanine" + }, + { + "$ref": "monomerTemplate-C___Cytosine" + }, + { + "$ref": "monomerTemplate-daA___N,N-dimethyl-Adenine" + } + ] + }, + "monomer0": { + "type": "monomer", + "id": "0", + "seqid": 1, + "position": { + "x": 0.0, + "y": -0.0 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "monomer1": { + "type": "monomer", + "id": "1", + "seqid": 2, + "position": { + "x": 0.0, + "y": -1.600000023841858 + }, + "alias": "U", + "templateId": "U___Uracil" + }, + "monomer2": { + "type": "monomer", + "id": "2", + "seqid": 3, + "position": { + "x": 1.600000023841858, + "y": -0.0 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomer3": { + "type": "monomer", + "id": "3", + "seqid": 4, + "position": { + "x": 3.200000047683716, + "y": -0.0 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "monomer4": { + "type": "monomer", + "id": "4", + "seqid": 5, + "position": { + "x": 3.200000047683716, + "y": -1.600000023841858 + }, + "alias": "T", + "templateId": "T___Thymine" + }, + "monomer5": { + "type": "monomer", + "id": "5", + "seqid": 6, + "position": { + "x": 4.800000190734863, + "y": -0.0 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomer6": { + "type": "monomer", + "id": "6", + "seqid": 7, + "position": { + "x": 6.400000095367432, + "y": -0.0 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "monomer7": { + "type": "monomer", + "id": "7", + "seqid": 8, + "position": { + "x": 6.400000095367432, + "y": -1.600000023841858 + }, + "alias": "G", + "templateId": "G___Guanine" + }, + "monomer8": { + "type": "monomer", + "id": "8", + "seqid": 9, + "position": { + "x": 8.0, + "y": -0.0 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomer9": { + "type": "monomer", + "id": "9", + "seqid": 10, + "position": { + "x": 9.600000381469727, + "y": -0.0 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "monomer10": { + "type": "monomer", + "id": "10", + "seqid": 11, + "position": { + "x": 9.600000381469727, + "y": -1.600000023841858 + }, + "alias": "C", + "templateId": "C___Cytosine" + }, + "monomer11": { + "type": "monomer", + "id": "11", + "seqid": 12, + "position": { + "x": 11.199999809265137, + "y": -0.0 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomer12": { + "type": "monomer", + "id": "12", + "seqid": 13, + "position": { + "x": 12.800000190734864, + "y": -0.0 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "monomer13": { + "type": "monomer", + "id": "13", + "seqid": 14, + "position": { + "x": 12.800000190734864, + "y": -1.600000023841858 + }, + "alias": "daA", + "templateId": "daA___N,N-dimethyl-Adenine" + }, + "monomerTemplate-R___Ribose": { + "type": "monomerTemplate", + "id": "R___Ribose", + "class": "Sugar", + "classHELM": "RNA", + "alias": "R", + "name": "Rib", + "fullName": "Ribose", + "naturalAnalogShort": "R", + "naturalAnalog": "Rib", + "attachmentPoints": [ + { + "attachmentAtom": 9, + "leavingGroup": { + "atoms": [ + 10 + ] + } + }, + { + "attachmentAtom": 5, + "leavingGroup": { + "atoms": [ + 11 + ] + } + }, + { + "attachmentAtom": 2, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "O", + "location": [ + -1.101699948310852, + -1.0663000345230103, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.5896999835968018, + 0.34360000491142275, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.08089999854564667, + -1.9888999462127686, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.909500002861023, + 0.2924000024795532, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 1.3238999843597413, + -1.1492999792099, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "O", + "location": [ + 1.8285000324249268, + 1.4754999876022339, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.4518001079559328, + -1.558899998664856, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.430999994277954, + 1.583400011062622, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.039900001138448718, + -3.1881000995635988, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -2.9279000759124758, + 1.4754999876022339, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.6017000675201418, + 2.468400001525879, + 0.0 + ] + }, + { + "label": "H", + "location": [ + 3.017400026321411, + 1.3125, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 7 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 2, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 8 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 4, + 6 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 11 + ] + }, + { + "type": 1, + "atoms": [ + 7, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 9, + 10 + ] + } + ] + }, + "monomerTemplate-U___Uracil": { + "type": "monomerTemplate", + "id": "U___Uracil", + "class": "Base", + "classHELM": "RNA", + "alias": "U", + "name": "Ura", + "fullName": "Uracil", + "naturalAnalogShort": "U", + "naturalAnalog": "Ura", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.8617000579833985, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.1117000579833985, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.38830000162124636, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.138200044631958, + 1.350000023841858, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.3882000148296356, + 2.6489999294281008, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 1.1117000579833985, + 2.648900032043457, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 3.061800003051758, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -0.9882000088691711, + 3.688199996948242, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.3382999897003176, + 1.350000023841858, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 0, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 8 + ] + }, + { + "type": 2, + "atoms": [ + 4, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + } + ] + }, + "monomerTemplate-P___Phosphate": { + "type": "monomerTemplate", + "id": "P___Phosphate", + "class": "Phosphate", + "classHELM": "RNA", + "alias": "P", + "name": "P", + "fullName": "Phosphate", + "naturalAnalogShort": "P", + "attachmentPoints": [ + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 1 + ] + } + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 3 + ] + } + } + ], + "atoms": [ + { + "label": "P", + "location": [ + -0.23989999294281007, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -1.4399000406265259, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.3598000109195709, + -1.0393999814987183, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.960099995136261, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.3598000109195709, + 1.0393999814987183, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 4 + ] + } + ] + }, + "monomerTemplate-T___Thymine": { + "type": "monomerTemplate", + "id": "T___Thymine", + "class": "Base", + "classHELM": "RNA", + "alias": "T", + "name": "Thy", + "fullName": "Thymine", + "naturalAnalogShort": "T", + "naturalAnalog": "Thy", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.8617000579833985, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.1117000579833985, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.38830000162124636, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.138200044631958, + 1.350000023841858, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.3882000148296356, + 2.6489999294281008, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 1.1117000579833985, + 2.648900032043457, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 3.061800003051758, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -0.9882000088691711, + 3.688199996948242, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.3382999897003176, + 1.350000023841858, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.7116999626159669, + -0.9883999824523926, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 0, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 8 + ] + }, + { + "type": 2, + "atoms": [ + 4, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 9 + ] + } + ] + }, + "monomerTemplate-G___Guanine": { + "type": "monomerTemplate", + "id": "G___Guanine", + "class": "Base", + "classHELM": "RNA", + "alias": "G", + "name": "Gua", + "fullName": "Guanine", + "naturalAnalogShort": "G", + "naturalAnalog": "Gua", + "attachmentPoints": [ + { + "attachmentAtom": 6, + "leavingGroup": { + "atoms": [ + 11 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.0354000329971314, + 0.24979999661445619, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.07919999957084656, + -0.7540000081062317, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5056999921798707, + -0.2906000018119812, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.8177000284194947, + 1.1765999794006348, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.7031000256538391, + 2.1803998947143556, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 0.7235000133514404, + 1.7170000076293946, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -2.3870999813079836, + -1.5033999681472779, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5053000450134278, + -2.7167999744415285, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -0.0786999985575676, + -2.253200054168701, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.176800012588501, + -0.120899997651577, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -0.9527000188827515, + 3.3541998863220217, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.587100028991699, + -1.5033999681472779, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 11 + ] + }, + { + "type": 2, + "atoms": [ + 7, + 8 + ] + } + ] + }, + "monomerTemplate-C___Cytosine": { + "type": "monomerTemplate", + "id": "C___Cytosine", + "class": "Base", + "classHELM": "RNA", + "alias": "C", + "name": "Cyt", + "fullName": "Cytosine", + "naturalAnalogShort": "C", + "naturalAnalog": "Cyt", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.8617000579833985, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.1117000579833985, + 2.648900032043457, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.3882000148296356, + 2.6489999294281008, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.138200044631958, + 1.350000023841858, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.38830000162124636, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 1.1117000579833985, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 3.061800003051758, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -0.9883999824523926, + -0.9883000254631043, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.3382999897003176, + 1.350000023841858, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 6 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 8 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 2, + "atoms": [ + 4, + 7 + ] + } + ] + }, + "monomerTemplate-daA___N,N-dimethyl-Adenine": { + "type": "monomerTemplate", + "id": "daA___N,N-dimethyl-Adenine", + "class": "Base", + "classHELM": "RNA", + "alias": "daA", + "name": "daA", + "fullName": "N,N-dimethyl-Adenine", + "naturalAnalogShort": "A", + "naturalAnalog": "Ade", + "attachmentPoints": [ + { + "attachmentAtom": 6, + "leavingGroup": { + "atoms": [ + 10 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.0354000329971314, + 0.24979999661445619, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.07919999957084656, + -0.7540000081062317, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5056999921798707, + -0.2906000018119812, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.8177000284194947, + 1.1765999794006348, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.7031000256538391, + 2.1803998947143556, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 0.7235000133514404, + 1.7170000076293946, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -2.3870999813079836, + -1.5033999681472779, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5053000450134278, + -2.7167999744415285, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -0.0786999985575676, + -2.253200054168701, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 2.4637999534606935, + -0.21089999377727509, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.587100028991699, + -1.5033999681472779, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.3543999195098879, + 0.5932999849319458, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 2.715399980545044, + -1.3841999769210816, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 10 + ] + }, + { + "type": 2, + "atoms": [ + 7, + 8 + ] + }, + { + "type": 1, + "atoms": [ + 9, + 11 + ] + }, + { + "type": 1, + "atoms": [ + 9, + 12 + ] + } + ] + } +} \ No newline at end of file diff --git a/api/tests/integration/tests/formats/ref/helm_peptide.ket b/api/tests/integration/tests/formats/ref/helm_peptide.ket new file mode 100644 index 0000000000..447e6f47a4 --- /dev/null +++ b/api/tests/integration/tests/formats/ref/helm_peptide.ket @@ -0,0 +1,535 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer0" + }, + { + "$ref": "monomer1" + }, + { + "$ref": "monomer2" + } + ], + "connections": [ + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer1", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer1", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer2", + "attachmentPointId": "R1" + } + } + ], + "templates": [ + { + "$ref": "monomerTemplate-A___Alanine" + }, + { + "$ref": "monomerTemplate-meA___N-Methyl-Alanine" + }, + { + "$ref": "monomerTemplate-C___Cysteine" + } + ] + }, + "monomer0": { + "type": "monomer", + "id": "0", + "seqid": 1, + "position": { + "x": 0.0, + "y": -0.0 + }, + "alias": "A", + "templateId": "A___Alanine" + }, + "monomer1": { + "type": "monomer", + "id": "1", + "seqid": 2, + "position": { + "x": 1.600000023841858, + "y": -0.0 + }, + "alias": "meA", + "templateId": "meA___N-Methyl-Alanine" + }, + "monomer2": { + "type": "monomer", + "id": "2", + "seqid": 3, + "position": { + "x": 3.200000047683716, + "y": -0.0 + }, + "alias": "C", + "templateId": "C___Cysteine" + }, + "monomerTemplate-A___Alanine": { + "type": "monomerTemplate", + "id": "A___Alanine", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "A", + "name": "Ala", + "fullName": "Alanine", + "naturalAnalogShort": "A", + "naturalAnalog": "Ala", + "attachmentPoints": [ + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 6 + ] + } + }, + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 5 + ] + } + } + ], + "atoms": [ + { + "label": "N", + "location": [ + -1.2548999786376954, + -0.3919999897480011, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.2720000147819519, + 0.26330000162124636, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + -0.31029999256134035, + 1.739300012588501, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.052299976348877, + -0.3919999897480011, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.082900047302246, + -1.5721999406814576, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.0353000164031984, + 0.26330000162124636, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.333400011062622, + 0.09049999713897705, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 1, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 6 + ] + } + ] + }, + "monomerTemplate-meA___N-Methyl-Alanine": { + "type": "monomerTemplate", + "id": "meA___N-Methyl-Alanine", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "meA", + "name": "meA", + "fullName": "N-Methyl-Alanine", + "naturalAnalogShort": "A", + "naturalAnalog": "Ala", + "attachmentPoints": [ + { + "attachmentAtom": 5, + "leavingGroup": { + "atoms": [ + 7 + ] + } + }, + { + "attachmentAtom": 2, + "leavingGroup": { + "atoms": [ + 3 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + -0.00009999999747378752, + 1.6240999698638917, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.00009999999747378752, + 0.44279998540878298, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 1.0228999853134156, + -0.147599995136261, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.0457000732421877, + 0.4429999887943268, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.0230000019073487, + -1.3286000490188599, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.0228999853134156, + -0.147599995136261, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -2.045799970626831, + 0.44269999861717226, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -1.0226999521255494, + -1.3287999629974366, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 1, + 0 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 2, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 5, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 5, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 5, + 7 + ] + } + ] + }, + "monomerTemplate-C___Cysteine": { + "type": "monomerTemplate", + "id": "C___Cysteine", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "C", + "name": "Cys", + "fullName": "Cysteine", + "naturalAnalogShort": "C", + "naturalAnalog": "Cys", + "attachmentPoints": [ + { + "attachmentAtom": 4, + "leavingGroup": { + "atoms": [ + 7 + ] + } + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 6 + ] + } + }, + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.4457000494003297, + -1.1332999467849732, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.1453000009059906, + -0.3840000033378601, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.14300000667572022, + 1.1167999505996705, + 0.0 + ] + }, + { + "label": "S", + "location": [ + -1.1572999954223633, + 1.8660999536514283, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.1550999879837037, + -1.1332999467849732, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.4474999904632569, + -2.3333001136779787, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.4842000007629396, + -0.5320000052452087, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.194200038909912, + -0.5331000089645386, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -1.15910005569458, + 3.0660998821258547, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 5, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 8 + ] + } + ] + } +} \ No newline at end of file diff --git a/api/tests/integration/tests/formats/ref/helm_simple_rna.ket b/api/tests/integration/tests/formats/ref/helm_simple_rna.ket new file mode 100644 index 0000000000..66a4c7fa1c --- /dev/null +++ b/api/tests/integration/tests/formats/ref/helm_simple_rna.ket @@ -0,0 +1,1603 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer0" + }, + { + "$ref": "monomer1" + }, + { + "$ref": "monomer2" + }, + { + "$ref": "monomer3" + }, + { + "$ref": "monomer4" + }, + { + "$ref": "monomer5" + }, + { + "$ref": "monomer6" + }, + { + "$ref": "monomer7" + }, + { + "$ref": "monomer8" + }, + { + "$ref": "monomer9" + }, + { + "$ref": "monomer10" + }, + { + "$ref": "monomer11" + }, + { + "$ref": "monomer12" + }, + { + "$ref": "monomer13" + } + ], + "connections": [ + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer1", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer2", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer2", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer3", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer3", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer4", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer3", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer5", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer5", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer6", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer6", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer7", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer6", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer8", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer8", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer9", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer9", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer10", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer9", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer11", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer11", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer12", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer12", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "monomer13", + "attachmentPointId": "R1" + } + } + ], + "templates": [ + { + "$ref": "monomerTemplate-R___Ribose" + }, + { + "$ref": "monomerTemplate-U___Uracil" + }, + { + "$ref": "monomerTemplate-P___Phosphate" + }, + { + "$ref": "monomerTemplate-T___Thymine" + }, + { + "$ref": "monomerTemplate-G___Guanine" + }, + { + "$ref": "monomerTemplate-C___Cytosine" + }, + { + "$ref": "monomerTemplate-A___Adenine" + } + ] + }, + "monomer0": { + "type": "monomer", + "id": "0", + "seqid": 1, + "position": { + "x": 0.0, + "y": -0.0 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "monomer1": { + "type": "monomer", + "id": "1", + "seqid": 2, + "position": { + "x": 0.0, + "y": -1.600000023841858 + }, + "alias": "U", + "templateId": "U___Uracil" + }, + "monomer2": { + "type": "monomer", + "id": "2", + "seqid": 3, + "position": { + "x": 1.600000023841858, + "y": -0.0 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomer3": { + "type": "monomer", + "id": "3", + "seqid": 4, + "position": { + "x": 3.200000047683716, + "y": -0.0 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "monomer4": { + "type": "monomer", + "id": "4", + "seqid": 5, + "position": { + "x": 3.200000047683716, + "y": -1.600000023841858 + }, + "alias": "T", + "templateId": "T___Thymine" + }, + "monomer5": { + "type": "monomer", + "id": "5", + "seqid": 6, + "position": { + "x": 4.800000190734863, + "y": -0.0 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomer6": { + "type": "monomer", + "id": "6", + "seqid": 7, + "position": { + "x": 6.400000095367432, + "y": -0.0 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "monomer7": { + "type": "monomer", + "id": "7", + "seqid": 8, + "position": { + "x": 6.400000095367432, + "y": -1.600000023841858 + }, + "alias": "G", + "templateId": "G___Guanine" + }, + "monomer8": { + "type": "monomer", + "id": "8", + "seqid": 9, + "position": { + "x": 8.0, + "y": -0.0 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomer9": { + "type": "monomer", + "id": "9", + "seqid": 10, + "position": { + "x": 9.600000381469727, + "y": -0.0 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "monomer10": { + "type": "monomer", + "id": "10", + "seqid": 11, + "position": { + "x": 9.600000381469727, + "y": -1.600000023841858 + }, + "alias": "C", + "templateId": "C___Cytosine" + }, + "monomer11": { + "type": "monomer", + "id": "11", + "seqid": 12, + "position": { + "x": 11.199999809265137, + "y": -0.0 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomer12": { + "type": "monomer", + "id": "12", + "seqid": 13, + "position": { + "x": 12.800000190734864, + "y": -0.0 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "monomer13": { + "type": "monomer", + "id": "13", + "seqid": 14, + "position": { + "x": 12.800000190734864, + "y": -1.600000023841858 + }, + "alias": "A", + "templateId": "A___Adenine" + }, + "monomerTemplate-R___Ribose": { + "type": "monomerTemplate", + "id": "R___Ribose", + "class": "Sugar", + "classHELM": "RNA", + "alias": "R", + "name": "Rib", + "fullName": "Ribose", + "naturalAnalogShort": "R", + "naturalAnalog": "Rib", + "attachmentPoints": [ + { + "attachmentAtom": 9, + "leavingGroup": { + "atoms": [ + 10 + ] + } + }, + { + "attachmentAtom": 5, + "leavingGroup": { + "atoms": [ + 11 + ] + } + }, + { + "attachmentAtom": 2, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "O", + "location": [ + -1.101699948310852, + -1.0663000345230103, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.5896999835968018, + 0.34360000491142275, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.08089999854564667, + -1.9888999462127686, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.909500002861023, + 0.2924000024795532, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 1.3238999843597413, + -1.1492999792099, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "O", + "location": [ + 1.8285000324249268, + 1.4754999876022339, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.4518001079559328, + -1.558899998664856, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.430999994277954, + 1.583400011062622, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.039900001138448718, + -3.1881000995635988, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -2.9279000759124758, + 1.4754999876022339, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.6017000675201418, + 2.468400001525879, + 0.0 + ] + }, + { + "label": "H", + "location": [ + 3.017400026321411, + 1.3125, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 7 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 2, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 8 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 4, + 6 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 11 + ] + }, + { + "type": 1, + "atoms": [ + 7, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 9, + 10 + ] + } + ] + }, + "monomerTemplate-U___Uracil": { + "type": "monomerTemplate", + "id": "U___Uracil", + "class": "Base", + "classHELM": "RNA", + "alias": "U", + "name": "Ura", + "fullName": "Uracil", + "naturalAnalogShort": "U", + "naturalAnalog": "Ura", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.8617000579833985, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.1117000579833985, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.38830000162124636, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.138200044631958, + 1.350000023841858, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.3882000148296356, + 2.6489999294281008, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 1.1117000579833985, + 2.648900032043457, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 3.061800003051758, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -0.9882000088691711, + 3.688199996948242, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.3382999897003176, + 1.350000023841858, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 0, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 8 + ] + }, + { + "type": 2, + "atoms": [ + 4, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + } + ] + }, + "monomerTemplate-P___Phosphate": { + "type": "monomerTemplate", + "id": "P___Phosphate", + "class": "Phosphate", + "classHELM": "RNA", + "alias": "P", + "name": "P", + "fullName": "Phosphate", + "naturalAnalogShort": "P", + "attachmentPoints": [ + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 1 + ] + } + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 3 + ] + } + } + ], + "atoms": [ + { + "label": "P", + "location": [ + -0.23989999294281007, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -1.4399000406265259, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.3598000109195709, + -1.0393999814987183, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.960099995136261, + 0.0, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.3598000109195709, + 1.0393999814987183, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 4 + ] + } + ] + }, + "monomerTemplate-T___Thymine": { + "type": "monomerTemplate", + "id": "T___Thymine", + "class": "Base", + "classHELM": "RNA", + "alias": "T", + "name": "Thy", + "fullName": "Thymine", + "naturalAnalogShort": "T", + "naturalAnalog": "Thy", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.8617000579833985, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.1117000579833985, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.38830000162124636, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.138200044631958, + 1.350000023841858, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.3882000148296356, + 2.6489999294281008, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 1.1117000579833985, + 2.648900032043457, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 3.061800003051758, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -0.9882000088691711, + 3.688199996948242, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.3382999897003176, + 1.350000023841858, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.7116999626159669, + -0.9883999824523926, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 0, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 8 + ] + }, + { + "type": 2, + "atoms": [ + 4, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 9 + ] + } + ] + }, + "monomerTemplate-G___Guanine": { + "type": "monomerTemplate", + "id": "G___Guanine", + "class": "Base", + "classHELM": "RNA", + "alias": "G", + "name": "Gua", + "fullName": "Guanine", + "naturalAnalogShort": "G", + "naturalAnalog": "Gua", + "attachmentPoints": [ + { + "attachmentAtom": 6, + "leavingGroup": { + "atoms": [ + 11 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.0354000329971314, + 0.24979999661445619, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.07919999957084656, + -0.7540000081062317, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5056999921798707, + -0.2906000018119812, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.8177000284194947, + 1.1765999794006348, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.7031000256538391, + 2.1803998947143556, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 0.7235000133514404, + 1.7170000076293946, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -2.3870999813079836, + -1.5033999681472779, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5053000450134278, + -2.7167999744415285, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -0.0786999985575676, + -2.253200054168701, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.176800012588501, + -0.120899997651577, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -0.9527000188827515, + 3.3541998863220217, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.587100028991699, + -1.5033999681472779, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 11 + ] + }, + { + "type": 2, + "atoms": [ + 7, + 8 + ] + } + ] + }, + "monomerTemplate-C___Cytosine": { + "type": "monomerTemplate", + "id": "C___Cytosine", + "class": "Base", + "classHELM": "RNA", + "alias": "C", + "name": "Cyt", + "fullName": "Cytosine", + "naturalAnalogShort": "C", + "naturalAnalog": "Cyt", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.8617000579833985, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.1117000579833985, + 2.648900032043457, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.3882000148296356, + 2.6489999294281008, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.138200044631958, + 1.350000023841858, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.38830000162124636, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 1.1117000579833985, + 0.05090000107884407, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 3.061800003051758, + 1.3499000072479249, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -0.9883999824523926, + -0.9883000254631043, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.3382999897003176, + 1.350000023841858, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 6 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 8 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 2, + "atoms": [ + 4, + 7 + ] + } + ] + }, + "monomerTemplate-A___Adenine": { + "type": "monomerTemplate", + "id": "A___Adenine", + "class": "Base", + "classHELM": "RNA", + "alias": "A", + "name": "Ade", + "fullName": "Adenine", + "naturalAnalogShort": "A", + "naturalAnalog": "Ade", + "attachmentPoints": [ + { + "attachmentAtom": 6, + "leavingGroup": { + "atoms": [ + 10 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.0354000329971314, + 0.24979999661445619, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.07919999957084656, + -0.7540000081062317, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5056999921798707, + -0.2906000018119812, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.8177000284194947, + 1.1765999794006348, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.7031000256538391, + 2.1803998947143556, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 0.7235000133514404, + 1.7170000076293946, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -2.3870999813079836, + -1.5033999681472779, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -1.5053000450134278, + -2.7167999744415285, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -0.0786999985575676, + -2.253200054168701, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 2.176800012588501, + -0.120899997651577, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -3.587100028991699, + -1.5033999681472779, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 10 + ] + }, + { + "type": 2, + "atoms": [ + 7, + 8 + ] + } + ] + } +} \ No newline at end of file diff --git a/core/indigo-core/molecule/monomer_commons.h b/core/indigo-core/molecule/monomer_commons.h index 0ec2ecc6dc..0a8814fe74 100644 --- a/core/indigo-core/molecule/monomer_commons.h +++ b/core/indigo-core/molecule/monomer_commons.h @@ -96,5 +96,22 @@ namespace indigo int leaving_group; std::string id; }; + + const auto kHELMPolymerTypePEPTIDE = kMonomerClassPEPTIDE; + const auto kHELMPolymerTypeRNA = kMonomerClassRNA; + const auto kHELMPolymerTypeCHEM = kMonomerClassCHEM; + const auto kHELMPolymerTypeUnknown = "BLOB"; + + enum class HELMType + { + Peptide, + RNA, + Chem, + Unknown + }; + + HELMType getHELMTypeFromString(const std::string& helm_type); + const std::string& getStringFromHELMType(HELMType helm_type); + } #endif \ No newline at end of file diff --git a/core/indigo-core/molecule/monomers_lib.h b/core/indigo-core/molecule/monomers_lib.h index 8e8367e7cf..b6fd051952 100644 --- a/core/indigo-core/molecule/monomers_lib.h +++ b/core/indigo-core/molecule/monomers_lib.h @@ -161,7 +161,7 @@ namespace indigo {MonomerClass::Terminator, "Terminator"}, {MonomerClass::Linker, "Linker"}, {MonomerClass::Unknown, "Unknown"}, - {MonomerClass::CHEM, "Chem"}, + {MonomerClass::CHEM, "CHEM"}, {MonomerClass::DNA, "DNA"}, {MonomerClass::RNA, "RNA"}, }; @@ -179,7 +179,7 @@ namespace indigo {"Terminator", MonomerClass::Terminator}, {"Linker", MonomerClass::Linker}, {"Unknown", MonomerClass::Unknown}, - {"Chem", MonomerClass::CHEM}, + {"CHEM", MonomerClass::CHEM}, {"DNA", MonomerClass::DNA}, {"RNA", MonomerClass::RNA}, }; diff --git a/core/indigo-core/molecule/sequence_loader.h b/core/indigo-core/molecule/sequence_loader.h index 64d4e9668c..56faac1198 100644 --- a/core/indigo-core/molecule/sequence_loader.h +++ b/core/indigo-core/molecule/sequence_loader.h @@ -64,6 +64,7 @@ namespace indigo void loadFasta(BaseMolecule& mol, const std::string& seq_type_str); void loadFasta(BaseMolecule& mol, SeqType seq_type); void loadIdt(BaseMolecule& mol); + void loadHELM(BaseMolecule& mol); private: Vec3f getBackboneMonomerPosition(); @@ -85,6 +86,11 @@ namespace indigo static void check_monomer_place(std::string& idt_alias, IdtModification mon_mod, IdtModification alias_mod, bool has_prev_mon); + using MonomerInfo = std::tuple; + + MonomerInfo readHelmMonomer(); + std::string readHelmSimplePolymerName(std::string& polymer_name); + Scanner& _scanner; std::unordered_set, pair_hash> _added_templates; const MonomerTemplates& _mon_lib; diff --git a/core/indigo-core/molecule/sequence_saver.h b/core/indigo-core/molecule/sequence_saver.h index c3febe9575..05d5b0d6ad 100644 --- a/core/indigo-core/molecule/sequence_saver.h +++ b/core/indigo-core/molecule/sequence_saver.h @@ -41,7 +41,8 @@ namespace indigo { Sequence, FASTA, - IDT + IDT, + HELM }; static constexpr uint32_t SEQ_LINE_LENGTH = 80; @@ -56,8 +57,11 @@ namespace indigo protected: TGroup& getTGroup(); std::string saveIdt(BaseMolecule& mol, std::deque& sequence); + std::string saveHELM(BaseMolecule& mol, std::vector>& sequence); private: + std::string getMonomerAlias(BaseMolecule& mol, int atom_idx); + std::string getHelmPolymerClass(BaseMolecule& mol, int atom_idx); SequenceSaver(const SequenceSaver&); // no implicit copy Output& _output; const MonomerTemplates& _mon_lib; diff --git a/core/indigo-core/molecule/src/monomer_commons.cpp b/core/indigo-core/molecule/src/monomer_commons.cpp index efd3af0f49..ad9cb159e3 100644 --- a/core/indigo-core/molecule/src/monomer_commons.cpp +++ b/core/indigo-core/molecule/src/monomer_commons.cpp @@ -269,6 +269,29 @@ namespace indigo } return tg_it == templates_map.end() ? std::nullopt : std::optional>(std::ref(tg_it->second)); } + + HELMType getHELMTypeFromString(const std::string& helm_type) + { + static const std::unordered_map strToType = { + {kHELMPolymerTypePEPTIDE, HELMType::Peptide}, + {kHELMPolymerTypeRNA, HELMType::RNA}, + {kHELMPolymerTypeCHEM, HELMType::Chem}, + {kHELMPolymerTypeUnknown, HELMType::Unknown}, + }; + return strToType.at(helm_type); + } + + const std::string& getStringFromHELMType(HELMType helm_type) + { + static const std::unordered_map typeToStr = { + {HELMType::Peptide, kHELMPolymerTypePEPTIDE}, + {HELMType::RNA, kHELMPolymerTypeRNA}, + {HELMType::Chem, kHELMPolymerTypeCHEM}, + {HELMType::Unknown, kHELMPolymerTypeUnknown}, + }; + return typeToStr.at(helm_type); + } + } #ifdef _MSC_VER diff --git a/core/indigo-core/molecule/src/sequence_loader.cpp b/core/indigo-core/molecule/src/sequence_loader.cpp index b901b5a894..318ac845b9 100644 --- a/core/indigo-core/molecule/src/sequence_loader.cpp +++ b/core/indigo-core/molecule/src/sequence_loader.cpp @@ -694,3 +694,440 @@ void SequenceLoader::loadIdt(BaseMolecule& mol) if (invalid_symbols.size()) throw Error("Invalid symbols in the sequence: %s", invalid_symbols.c_str()); } + +static std::set polymer_types{kHELMPolymerTypePEPTIDE, kHELMPolymerTypeRNA, kHELMPolymerTypeCHEM, kHELMPolymerTypeUnknown}; +static const char* reserved_helm_chars = "${}|.,-:[]()"; +static const char* unexpected_eod = unexpected_eod; + +SequenceLoader::MonomerInfo SequenceLoader::readHelmMonomer() +{ + std::string monomer_name, repeating, annotation; + auto ch = _scanner.lookNext(); + if (ch == '[') + { + std::string name; + _scanner.skip(1); + bool smiles = false; + for (int bracket_count = 1; bracket_count != 0 && !_scanner.isEOF();) + { + ch = _scanner.readChar(); + switch (ch) + { + case '[': + bracket_count++; + smiles = true; + break; + case ']': + bracket_count--; + break; + default: + monomer_name += ch; + break; + } + } + if (_scanner.isEOF()) + throw Error(unexpected_eod); + if (ch != ']') + throw Error("Unexpected char. Expected ']' but found '%c'.", ch); + if (smiles) + throw Error("Inline smiles not supported for now."); + } + else if (ch != -1) + { + _scanner.skip(1); + monomer_name = ch; + } + if (_scanner.isEOF()) + throw Error(unexpected_eod); + if (_scanner.lookNext() == '\'') + { + // repeating + Array name; + _scanner.skip(1); + _scanner.readWord(name, "'"); + if (_scanner.lookNext() != '\'') + throw Error("Unexpected char. Expected ''' but found '%c'.", _scanner.lookNext()); + _scanner.skip(1); // skip "'" + repeating = name.ptr(); + } + if (_scanner.lookNext() == '"') // inline annotation + { + Array name; + _scanner.skip(1); + _scanner.readWord(name, "\""); + if (_scanner.lookNext() != '"') + throw Error("Unexpected char. Expected '\"' but found '%c'.", _scanner.lookNext()); + _scanner.skip(1); // skip '"' + annotation = name.ptr(); + } + // check monomer_name for + return std::make_tuple(monomer_name, repeating, annotation); +} + +std::string SequenceLoader::readHelmSimplePolymerName(std::string& polymer_name) +{ + auto ch = _scanner.lookNext(); + while (std::isalpha(ch) && !_scanner.isEOF()) + { + _scanner.skip(1); + polymer_name += std::toupper(ch); + ch = _scanner.lookNext(); + } + std::string polymer_type = polymer_name; + if (polymer_types.count(polymer_name) == 0) + throw Error("Unknown polymer type '%s'.", polymer_name.c_str()); + while (std::isdigit(ch) && !_scanner.isEOF()) + { + _scanner.skip(1); + polymer_name += ch; + ch = _scanner.lookNext(); + } + return polymer_type; +} + +void SequenceLoader::loadHELM(BaseMolecule& mol) +{ + _row = 0; + mol.clear(); + std::string simple_polymer_name = ""; + std::string simple_polymer_type = ""; + int monomer_idx = 0; + int prev_monomer_template_atom_idx = -1; + using polymer_map = std::map>; + polymer_map used_polymer_nums; + polymer_map::iterator cur_polymer_map; + enum class helm_parts + { + ListOfSimplePolymers, + ListOfConnections, + ListOfPolymerGroups, + ExtendedAnnotation, + End + }; + helm_parts helm_part = helm_parts::ListOfSimplePolymers; + auto& lib = MonomerTemplateLibrary::instance(); + + while (!_scanner.isEOF()) + { + if (helm_part == helm_parts::ListOfSimplePolymers) + { + auto ch = _scanner.lookNext(); + if (simple_polymer_name.size() == 0) // Read simple polymer_name + { + _col = 0; + simple_polymer_type = readHelmSimplePolymerName(simple_polymer_name); + if (used_polymer_nums.count(simple_polymer_name)) + throw Error("Simple polymer '%s' defined more than once.", simple_polymer_name.c_str()); + if (simple_polymer_name == simple_polymer_type) + throw Error("Polymer '%s' without number not allowed.", simple_polymer_name.c_str()); + ch = _scanner.lookNext(); + if (ch != '{') + throw Error("Unexpected symbol. Expected '{' but found '%c'.", ch); + _scanner.skip(1); // skip '{' + if (used_polymer_nums.count(simple_polymer_name)) + throw Error("Simple polymer '%s' defined more than once.", simple_polymer_name.c_str()); + auto res = used_polymer_nums.emplace(std::make_pair(simple_polymer_name, std::map())); + if (res.second) + cur_polymer_map = res.first; + else + throw Error("Internal error - cannot emplace polymer map."); + } + else if (ch == '(') + { + throw Error("Unexpected symbol '('. Group not supported for now."); + } + else if (ch != '}') + { + monomer_idx++; + Vec3f pos(_col * MoleculeLayout::DEFAULT_BOND_LENGTH, -MoleculeLayout::DEFAULT_BOND_LENGTH * _row, 0); + _col++; + if (simple_polymer_type == kHELMPolymerTypeUnknown) + { + Array name; + _scanner.readWord(name, reserved_helm_chars); + // skip blob for now + ch = _scanner.lookNext(); + if (ch != '}') + throw Error("Unexpected symbol. Expected '}' but found '%c'.", ch); + } + else if (simple_polymer_type == kHELMPolymerTypeCHEM) + { + auto [id, repeating, annotaion] = readHelmMonomer(); + ch = _scanner.lookNext(); + if (ch != '}') + throw Error("Unexpected symbol. Expected '}' but found '%c'.", ch); // only one monomer in chem + if (repeating.size()) + throw Error("Chem cannot be repeated."); + const std::string& monomer_id = lib.getMonomerTemplateIdByAlias(MonomerClass::CHEM, id); + if (monomer_id.size() == 0) // if not found - check for atom mapped SMILES([*:1]) and CXSMILES([*]...[*] |$_R1;;;;_R2;$|) - not now + throw Error("Monomer '%s' not found.", id.c_str()); + checkAddTemplate(mol, lib.getMonomerTemplateById(monomer_id)); + int chem_idx = mol.asMolecule().addAtom(-1); + mol.asMolecule().setTemplateAtom(chem_idx, id.c_str()); + mol.asMolecule().setTemplateAtomClass(chem_idx, kMonomerClassCHEM); + mol.asMolecule().setAtomXyz(chem_idx, pos); + cur_polymer_map->second[monomer_idx] = chem_idx; + } + else if (simple_polymer_type == kHELMPolymerTypePEPTIDE) + { + auto [id, repeating, annotaion] = readHelmMonomer(); + const std::string& monomer_id = lib.getMonomerTemplateIdByAlias(MonomerClass::AminoAcid, id); + if (monomer_id.size() == 0) // if not found - check for atom mapped SMILES([*:1]) and CXSMILES([*]...[*] |$_R1;;;;_R2;$|) - not now + throw Error("Monomer '%s' not found.", id.c_str()); + if (repeating.size()) + throw Error("Repeating do not supported now."); + checkAddTemplate(mol, lib.getMonomerTemplateById(monomer_id)); + int amino_idx = mol.asMolecule().addAtom(-1); + mol.asMolecule().setTemplateAtom(amino_idx, id.c_str()); + mol.asMolecule().setTemplateAtomClass(amino_idx, kMonomerClassAA); + mol.asMolecule().setTemplateAtomSeqid(amino_idx, monomer_idx); + mol.asMolecule().setAtomXyz(amino_idx, pos); + cur_polymer_map->second[monomer_idx] = amino_idx; + if (monomer_idx > 1) + { + mol.asMolecule().addBond_Silent(amino_idx - 1, amino_idx, BOND_SINGLE); + mol.setTemplateAtomAttachmentOrder(amino_idx - 1, amino_idx, kRightAttachmentPoint); + mol.setTemplateAtomAttachmentOrder(amino_idx, amino_idx - 1, kLeftAttachmentPoint); + } + ch = _scanner.lookNext(); + if (ch == '.') + _scanner.skip(1); + } + else // kHELMPolymerTypeRNA + { + auto [id, repeating, annotaion] = readHelmMonomer(); + const std::string& phosphate_lib_id = lib.getMonomerTemplateIdByAlias(MonomerClass::Phosphate, id); + if (phosphate_lib_id.size()) + { + if (repeating.size()) + throw Error("Phosphate cannot be repeated."); + // add phosphate + checkAddTemplate(mol, lib.getMonomerTemplateById(phosphate_lib_id)); + int phosphate_idx = mol.asMolecule().addAtom(-1); + mol.asMolecule().setTemplateAtom(phosphate_idx, id.c_str()); + mol.asMolecule().setTemplateAtomClass(phosphate_idx, kMonomerClassPHOSPHATE); + mol.asMolecule().setTemplateAtomSeqid(phosphate_idx, monomer_idx); + mol.asMolecule().setAtomXyz(phosphate_idx, pos); + cur_polymer_map->second[monomer_idx] = phosphate_idx; + if (monomer_idx > 1) + { + mol.asMolecule().addBond_Silent(phosphate_idx - 1, phosphate_idx, BOND_SINGLE); + mol.setTemplateAtomAttachmentOrder(phosphate_idx - 1, phosphate_idx, kRightAttachmentPoint); + mol.setTemplateAtomAttachmentOrder(phosphate_idx, phosphate_idx - 1, kLeftAttachmentPoint); + } + ch = _scanner.lookNext(); + if (ch != '.' && ch != '}') + throw Error("Unexpected symbol. Expected '.' or '}' but found '%c'.", ch); + if (ch == '.') + _scanner.skip(1); + continue; + } + const std::string& sugar_id = lib.getMonomerTemplateIdByAlias(MonomerClass::Sugar, id); + if (sugar_id.size() == 0) // if not found - check for atom mapped SMILES([*:1]) and CXSMILES([*]...[*] |$_R1;;;;_R2;$|) - not now + throw Error("Sugar '%s' not found.", id.c_str()); + if (repeating.size()) + throw Error("Sugar cannot be repeated."); + checkAddTemplate(mol, lib.getMonomerTemplateById(sugar_id)); + int sugar_idx = mol.asMolecule().addAtom(-1); + mol.asMolecule().setTemplateAtom(sugar_idx, id.c_str()); + mol.asMolecule().setTemplateAtomClass(sugar_idx, kMonomerClassSUGAR); + mol.asMolecule().setTemplateAtomSeqid(sugar_idx, monomer_idx); + mol.asMolecule().setAtomXyz(sugar_idx, pos); + cur_polymer_map->second[monomer_idx] = sugar_idx; + if (monomer_idx > 1) + { + mol.asMolecule().addBond_Silent(sugar_idx - 1, sugar_idx, BOND_SINGLE); + mol.setTemplateAtomAttachmentOrder(sugar_idx - 1, sugar_idx, kRightAttachmentPoint); + mol.setTemplateAtomAttachmentOrder(sugar_idx, sugar_idx - 1, kLeftAttachmentPoint); + } + ch = _scanner.lookNext(); + if (ch != '(') // In RNA after sugar should be base in () + throw Error("Expected '(' for base but found '%c'.", ch); + _scanner.skip(1); + monomer_idx++; + auto [base_id, base_repeating, base_annotaion] = readHelmMonomer(); + ch = _scanner.lookNext(); + if (ch != ')') // In RNA after sugar should be base in () + throw Error("Expected ')' after base but found '%c'.", ch); + _scanner.skip(1); + if (repeating.size()) + throw Error("Base cannot be repeated."); + const std::string& base_lib_id = lib.getMonomerTemplateIdByAlias(MonomerClass::Base, base_id); + if (base_lib_id.size() == 0) // if not found - check for atom mapped SMILES([*:1]) and CXSMILES([*]...[*] |$_R1;;;;_R2;$|) - not now + throw Error("Base '%s' not found.", base_id.c_str()); + if (base_repeating.size()) + throw Error("Base cannot be repeated."); + checkAddTemplate(mol, lib.getMonomerTemplateById(base_lib_id)); + Vec3f base_pos((_col - 1) * MoleculeLayout::DEFAULT_BOND_LENGTH, -MoleculeLayout::DEFAULT_BOND_LENGTH * (_row + 1), 0); + int base_idx = mol.asMolecule().addAtom(-1); + mol.asMolecule().setTemplateAtom(base_idx, base_id.c_str()); + mol.asMolecule().setTemplateAtomClass(base_idx, kMonomerClassBASE); + mol.asMolecule().setTemplateAtomSeqid(base_idx, monomer_idx); + mol.asMolecule().setAtomXyz(base_idx, base_pos); + cur_polymer_map->second[monomer_idx] = base_idx; + mol.asMolecule().addBond_Silent(sugar_idx, base_idx, BOND_SINGLE); + mol.setTemplateAtomAttachmentOrder(sugar_idx, base_idx, kBranchAttachmentPoint); + mol.setTemplateAtomAttachmentOrder(base_idx, sugar_idx, kLeftAttachmentPoint); + ch = _scanner.lookNext(); + if (ch == '.') + { + _scanner.skip(1); + continue; + } + if (ch == '}') + continue; + auto [phosphate_id, phosphate_repeating, phosphate_annotaion] = readHelmMonomer(); + const std::string& phosp_id = lib.getMonomerTemplateIdByAlias(MonomerClass::Phosphate, phosphate_id); + if (phosp_id.size() == 0) + throw Error("Phosphate '%s' not found.", phosphate_id.c_str()); + if (repeating.size()) + throw Error("Phosphate cannot be repeated."); + monomer_idx++; + checkAddTemplate(mol, lib.getMonomerTemplateById(phosp_id)); + Vec3f phosphate_pos(_col * MoleculeLayout::DEFAULT_BOND_LENGTH, -MoleculeLayout::DEFAULT_BOND_LENGTH * _row, 0); + _col++; + int phosphate_idx = mol.asMolecule().addAtom(-1); + mol.asMolecule().setTemplateAtom(phosphate_idx, phosphate_id.c_str()); + mol.asMolecule().setTemplateAtomClass(phosphate_idx, kMonomerClassPHOSPHATE); + mol.asMolecule().setTemplateAtomSeqid(phosphate_idx, monomer_idx); + mol.asMolecule().setAtomXyz(phosphate_idx, phosphate_pos); + cur_polymer_map->second[monomer_idx] = phosphate_idx; + mol.asMolecule().addBond_Silent(sugar_idx, phosphate_idx, BOND_SINGLE); + mol.setTemplateAtomAttachmentOrder(sugar_idx, phosphate_idx, kRightAttachmentPoint); + mol.setTemplateAtomAttachmentOrder(phosphate_idx, sugar_idx, kLeftAttachmentPoint); + ch = _scanner.lookNext(); + if (ch != '.' && ch != '}') + throw Error("Unexpected symbol. Expected '.' or '}' but found '%c'.", ch); + if (ch == '.') + _scanner.skip(1); + } + } + else // end of polymer - } + { + _scanner.skip(1); // skip '}' + ch = _scanner.lookNext(); + if (ch == '"') + { + Array annotation; + _scanner.skip(1); + _scanner.readWord(annotation, "\""); + if (_scanner.lookNext() != '"') + throw Error("Unexpected symbol. Expected '\"' but found '%c'.", _scanner.lookNext()); + _scanner.skip(1); + // skip annotation for now + ch = _scanner.lookNext(); + } + _row++; + _col = 0; + monomer_idx = 0; + if (simple_polymer_type == kHELMPolymerTypeRNA) + _row++; // additional row for bases in RNA + if (ch == '|') + { + // cleanup to go to next simple polymer + simple_polymer_name = ""; + simple_polymer_type = ""; + } + else if (ch == '$') + { + helm_part = helm_parts::ListOfConnections; + } + else if (ch == -1) + { + throw Error(unexpected_eod); + } + else + { + throw Error("Unexpected symbol. Expected '|' or '$' but found '%c'.", ch); + } + _scanner.skip(1); + } + } + else if (helm_part == helm_parts::ListOfConnections) + { + auto ch = _scanner.lookNext(); + if (ch == '$') + { + helm_part = helm_parts::ListOfPolymerGroups; + _scanner.skip(1); + continue; + } + // CHEM1,RNA1,32:R1-12:R2"annotation"|..... + std::string left_polymer, right_polymer; + std::ignore = readHelmSimplePolymerName(left_polymer); + ch = _scanner.lookNext(); + if (ch != ',') + throw Error("Unexpected symbol. Expected ',' but found '%c'.", _scanner.lookNext()); + _scanner.skip(1); + std::ignore = readHelmSimplePolymerName(right_polymer); + ch = _scanner.lookNext(); + if (ch != ',') + throw Error("Unexpected symbol. Expected ',' but found '%c'.", _scanner.lookNext()); + _scanner.skip(1); + // read monomer position + int left_monomer_idx, right_monomer_idx; + Array left_ap, right_ap; + Array position; + size_t error_pos; + _scanner.readWord(position, ":"); + left_monomer_idx = std::stoi(position.ptr(), &error_pos); + if (error_pos != position.size() - 1) // arrray contains 0 at the end + throw Error("Only direct connections supported now."); + _scanner.readWord(left_ap, "-"); + _scanner.skip(1); + position.clear(); + _scanner.readWord(position, ":"); + _scanner.skip(1); + right_monomer_idx = std::stoi(position.ptr(), &error_pos); + if (error_pos != position.size() - 1) // arrray contains 0 at the end + throw Error("Only direct connections supported now."); + _scanner.readWord(right_ap, "\"|$"); + int left_templ_atom_idx = used_polymer_nums[left_polymer][left_monomer_idx]; + int right_templ_atom_idx = used_polymer_nums[right_polymer][right_monomer_idx]; + mol.setTemplateAtomAttachmentOrder(left_templ_atom_idx, right_templ_atom_idx, left_ap.ptr()); + mol.setTemplateAtomAttachmentOrder(right_templ_atom_idx, left_templ_atom_idx, right_ap.ptr()); + if (_scanner.isEOF()) + throw Error(unexpected_eod); + ch = _scanner.readChar(); + if (ch == '"') + { + Array annotation; + _scanner.readWord(annotation, "\""); + if (_scanner.isEOF()) + throw Error(unexpected_eod); + if (_scanner.lookNext() != '"') + throw Error("Unexpected char. Expected '\"' but found '%c'.", _scanner.lookNext()); + _scanner.skip(1); // skip '"' + if (_scanner.isEOF()) + throw Error(unexpected_eod); + ch = _scanner.readChar(); + } + if (ch != '|' && ch != '$') + throw Error("Unexpected symbol. Expected '|' or '$' but found '%c'.", _scanner.lookNext()); + } + else if (helm_part == helm_parts::ListOfPolymerGroups) + { + Array groups; + _scanner.readWord(groups, "$"); + // skip groups for now + helm_part = helm_parts::ExtendedAnnotation; + } + else // helm_parts::ExtendedAnnotation + { + // read rest of data + std::string rest_of_helm; + _scanner.readAll(rest_of_helm); + auto it = rest_of_helm.find_last_of('$'); + if (it == rest_of_helm.npos) + throw Error("Incorrect format. Last '$' not found."); + std::string signature = rest_of_helm.substr(it + 1); + // split by last '$' and check if right part eq “V2.0” + // if (signature != "v2.0") + // throw Error("Expected HELM V2.0 but got '%s'.", signature.c_str()); + // check that left part is valid json - TODO + helm_part = helm_parts::End; + } + } + if (helm_part != helm_parts::End) + throw Error(unexpected_eod); +} diff --git a/core/indigo-core/molecule/src/sequence_saver.cpp b/core/indigo-core/molecule/src/sequence_saver.cpp index c8c4290faa..cd80e0d9df 100644 --- a/core/indigo-core/molecule/src/sequence_saver.cpp +++ b/core/indigo-core/molecule/src/sequence_saver.cpp @@ -273,6 +273,226 @@ std::string SequenceSaver::saveIdt(BaseMolecule& mol, std::deque& sequence) return seq_string; } +static inline void add_monomer(std::string& helm_string, const std::string& monomer_alias) +{ + if (monomer_alias.size() == 1) + helm_string += monomer_alias; + else + helm_string += '[' + monomer_alias + ']'; +} + +std::string SequenceSaver::getMonomerAlias(BaseMolecule& mol, int atom_idx) +{ + std::string monomer_alias = ""; + std::string monomer_class = mol.getTemplateAtomClass(atom_idx); + std::string monomer = mol.getTemplateAtom(atom_idx); + MonomerTemplateLibrary& lib = MonomerTemplateLibrary::instance(); + const std::string& monomer_id = lib.getMonomerTemplateIdByAlias(MonomerTemplates::getStrToMonomerType().at(monomer_class), monomer); + if (monomer_id.size()) + { + auto& monomer_template = MonomerTemplateLibrary::instance().getMonomerTemplateById(monomer_id); + monomer_alias = monomer_template.alias(); + } + return monomer_alias; +} + +std::string SequenceSaver::getHelmPolymerClass(BaseMolecule& mol, int atom_idx) +{ + std::string monomer_class = mol.getTemplateAtomClass(atom_idx); + std::string monomer = mol.getTemplateAtom(atom_idx); + std::string helm_polymer_class = ""; + MonomerTemplateLibrary& lib = MonomerTemplateLibrary::instance(); + const std::string& monomer_id = lib.getMonomerTemplateIdByAlias(MonomerTemplates::getStrToMonomerType().at(monomer_class), monomer); + if (monomer_id.size()) + { + auto& monomer_template = lib.getMonomerTemplateById(monomer_id); + helm_polymer_class = monomer_template.classHELM(); + } + if (helm_polymer_class.size() == 0) + { + if (isNucleicClass(monomer_class)) + helm_polymer_class = kHELMPolymerTypeRNA; + else if (isAminoAcidClass(monomer_class)) + helm_polymer_class = kHELMPolymerTypePEPTIDE; + else + helm_polymer_class = kHELMPolymerTypeCHEM; + } + return helm_polymer_class; +} + +std::string SequenceSaver::saveHELM(BaseMolecule& mol, std::vector>& sequences) +{ + std::string helm_string = ""; + int peptide_idx = 0; + int rna_idx = 0; + int chem_idx = 0; + std::set used_atoms; + using MonomerInfo = std::tuple; + constexpr int polymer_type = 0; + constexpr int polymer_num = 1; + constexpr int monomer_num = 2; + std::map atom_idx_to_monomer_info; + for (auto& sequence : sequences) + { + int monomer_idx = 0; + int polymer_idx = -1; + std::string helm_polymer_class = ""; + HELMType helm_type = HELMType::Unknown; + for (auto atom_idx : sequence) + { + if (used_atoms.count(atom_idx) > 0) // Phosphate can be processed with rest of nucleotide + continue; + std::string monomer = mol.getTemplateAtom(atom_idx); + std::string monomer_alias = getMonomerAlias(mol, atom_idx); + std::string monomer_class = mol.getTemplateAtomClass(atom_idx); + if (monomer_idx == 0) + { + // start new polymer + MonomerTemplateLibrary& lib = MonomerTemplateLibrary::instance(); + const std::string& monomer_id = lib.getMonomerTemplateIdByAlias(MonomerTemplates::getStrToMonomerType().at(monomer_class), monomer); + if (monomer_id.size()) + helm_polymer_class = lib.getMonomerTemplateById(monomer_id).classHELM(); + if (helm_string.size()) + helm_string += '|'; // separator between polymers + helm_string += helm_polymer_class; + helm_type = getHELMTypeFromString(helm_polymer_class); + if (helm_polymer_class == kHELMPolymerTypePEPTIDE) + polymer_idx = ++peptide_idx; + else if (helm_polymer_class == kHELMPolymerTypeRNA) + polymer_idx = ++rna_idx; + else if (helm_polymer_class == kHELMPolymerTypeCHEM) + polymer_idx = ++chem_idx; + helm_string += std::to_string(polymer_idx); + helm_string += '{'; + } + if (monomer_alias.size() == 0) + { + if (monomer_class == kMonomerClassBASE) + monomer_alias = monomerAliasByName(monomer_class, monomer); + else if (isAminoAcidClass(monomer_class)) + monomer_alias = monomerAliasByName(kMonomerClassAA, monomer); + else if (isNucleotideClass(monomer_class)) + monomer_alias = monomerAliasByName(kMonomerClassBASE, monomer); + if (monomer_alias.size() == 0) // If alias not foud - use monomer name + monomer_alias = monomer; + } + if (monomer_idx) + helm_string += '.'; // separator between monomers + add_monomer(helm_string, monomer_alias); + monomer_idx++; + atom_idx_to_monomer_info.emplace(std::make_pair(atom_idx, std::make_tuple(helm_type, polymer_idx, monomer_idx))); + + used_atoms.emplace(atom_idx); + + if (monomer_class == kMonomerClassSUGAR) + { + auto& v = mol.getVertex(atom_idx); + std::string phosphate = ""; + int phosphate_idx = -1; + for (auto nei_idx = v.neiBegin(); nei_idx < v.neiEnd(); nei_idx = v.neiNext(nei_idx)) + { + int nei_atom_idx = v.neiVertex(nei_idx); + if (mol.isTemplateAtom(nei_atom_idx)) + { + if (used_atoms.count(nei_atom_idx) > 0) + continue; + std::string mon_class = mol.getTemplateAtomClass(nei_atom_idx); + if (mon_class == kMonomerClassBASE) + { + helm_string += '('; // branch monomers in () + add_monomer(helm_string, monomerAliasByName(mon_class, mol.getTemplateAtom(nei_atom_idx))); + monomer_idx++; + atom_idx_to_monomer_info.emplace(std::make_pair(nei_atom_idx, std::make_tuple(helm_type, polymer_idx, monomer_idx))); + used_atoms.emplace(nei_atom_idx); + helm_string += ')'; + } + else if (mon_class == kMonomerClassPHOSPHATE) + { + phosphate = monomerAliasByName(mon_class, mol.getTemplateAtom(nei_atom_idx)); + phosphate_idx = nei_atom_idx; + } + } + } + if (phosphate.size()) + { + add_monomer(helm_string, phosphate); + monomer_idx++; + atom_idx_to_monomer_info.emplace(std::make_pair(phosphate_idx, std::make_tuple(helm_type, polymer_idx, monomer_idx))); + used_atoms.emplace(phosphate_idx); + } + } + } + if (monomer_idx) + helm_string += '}'; // Finish polymer + } + helm_string += '$'; + // Add connections + int connections_count = 0; + std::vector> directions_map; + mol.getTemplateAtomDirectionsMap(directions_map); + std::set> processed_connections; + for (int atom_idx = 0; atom_idx < mol.vertexCount(); atom_idx++) + { + if (mol.isTemplateAtom(atom_idx)) + { + for (auto& connection : directions_map[atom_idx]) + { + if (processed_connections.count(std::make_pair(atom_idx, connection.second)) == 0) + { + auto [cur_type, cur_pol_num, cur_mon_num] = atom_idx_to_monomer_info.at(atom_idx); + auto [nei_type, nei_pol_num, nei_mon_num] = atom_idx_to_monomer_info.at(connection.second); + if (cur_type != nei_type || cur_pol_num != nei_pol_num) // TODO: add check for connections between same polymer monomers + { + // add connection + if (connections_count) + helm_string += '|'; + helm_string += getStringFromHELMType(cur_type); + helm_string += std::to_string(cur_pol_num); + helm_string += ','; + helm_string += getStringFromHELMType(nei_type); + helm_string += std::to_string(nei_pol_num); + helm_string += ','; + helm_string += std::to_string(cur_mon_num); + helm_string += ":R"; + helm_string += std::to_string(connection.first); + helm_string += '-'; + helm_string += std::to_string(cur_mon_num); + helm_string += ':'; + int nei_ap_id = -1; + for (auto& nei_conn : directions_map[connection.second]) + { // TODO: rewrite when connection will contain info about neighb ap_id + if (nei_conn.second == atom_idx) + { + nei_ap_id = nei_conn.first; + break; + } + } + if (nei_ap_id >= 0) + { + helm_string += 'R'; + helm_string += std::to_string(nei_ap_id); + } + else + { + helm_string += '?'; + } + } + processed_connections.emplace(std::make_pair(atom_idx, connection.second)); + processed_connections.emplace(std::make_pair(connection.second, atom_idx)); + } + } + } + } + helm_string += '$'; + // Add polymer groups + helm_string += '$'; + // Add ExtendedAnnotation + helm_string += '$'; + // Add helm version + helm_string += "V2.0"; + return helm_string; +} + static void check_backbone_connection(BaseMolecule& mol, std::vector> directions_map, int template_idx, int side, std::map& side_backbone_links, std::map& other_side_backbone_links) { @@ -305,174 +525,181 @@ void SequenceSaver::saveMolecule(BaseMolecule& mol, SeqFormat sf) sl.sequenceExtract(sequences); auto prop_it = mol_properties.begin(); int seq_idx = 0; - if (sf == SeqFormat::IDT) + if (sf == SeqFormat::HELM) { - std::vector> directions_map; - mol.getTemplateAtomDirectionsMap(directions_map); - std::map left_backbone_links; - std::map right_backbone_links; - std::map seq_start; - std::map seq_end; - for (size_t idx = 0; idx < sequences.size(); idx++) - { - auto& sequence = sequences[idx]; - auto template_idx = sequence.front(); - seq_start[template_idx] = idx; - seq_end[sequence.back()] = idx; - if (sequence.size() != 1) // CHEM sequence always only one monomer - continue; - if (strcasecmp(mol.getTemplateAtomClass(template_idx), kMonomerClassCHEM)) - continue; - check_backbone_connection(mol, directions_map, template_idx, kLeftAttachmentPointIdx, left_backbone_links, right_backbone_links); - check_backbone_connection(mol, directions_map, template_idx, kRightAttachmentPointIdx, right_backbone_links, left_backbone_links); - } - if (left_backbone_links.size()) + seq_text = saveHELM(mol, sequences); + } + else + { + if (sf == SeqFormat::IDT) { - std::vector> joined_sequences; - while (left_backbone_links.size()) + std::vector> directions_map; + mol.getTemplateAtomDirectionsMap(directions_map); + std::map left_backbone_links; + std::map right_backbone_links; + std::map seq_start; + std::map seq_end; + for (size_t idx = 0; idx < sequences.size(); idx++) { - auto left_atom_idx = left_backbone_links.begin()->second; - // find leftmost sequence and copy to joined sequences - for (auto left = left_backbone_links.find(left_atom_idx); left != left_backbone_links.end(); left = left_backbone_links.find(left_atom_idx)) - { - left_atom_idx = left->second; - } - joined_sequences.push_back({}); - for (auto idx : sequences[seq_end[left_atom_idx]]) - { - joined_sequences.back().emplace_back(idx); - } - // while have sequence at right - connect it - for (auto right = right_backbone_links.find(left_atom_idx); right != right_backbone_links.end();) + auto& sequence = sequences[idx]; + auto template_idx = sequence.front(); + seq_start[template_idx] = idx; + seq_end[sequence.back()] = idx; + if (sequence.size() != 1) // CHEM sequence always only one monomer + continue; + if (strcasecmp(mol.getTemplateAtomClass(template_idx), kMonomerClassCHEM)) + continue; + check_backbone_connection(mol, directions_map, template_idx, kLeftAttachmentPointIdx, left_backbone_links, right_backbone_links); + check_backbone_connection(mol, directions_map, template_idx, kRightAttachmentPointIdx, right_backbone_links, left_backbone_links); + } + if (left_backbone_links.size()) + { + std::vector> joined_sequences; + while (left_backbone_links.size()) { - auto right_atom_idx = right->second; - left_backbone_links.erase(right_atom_idx); - int right_idx; - for (auto idx : sequences[seq_start[right_atom_idx]]) + auto left_atom_idx = left_backbone_links.begin()->second; + // find leftmost sequence and copy to joined sequences + for (auto left = left_backbone_links.find(left_atom_idx); left != left_backbone_links.end(); left = left_backbone_links.find(left_atom_idx)) + { + left_atom_idx = left->second; + } + joined_sequences.push_back({}); + for (auto idx : sequences[seq_end[left_atom_idx]]) { joined_sequences.back().emplace_back(idx); - right_idx = idx; } - right = right_backbone_links.find(right_idx); + // while have sequence at right - connect it + for (auto right = right_backbone_links.find(left_atom_idx); right != right_backbone_links.end();) + { + auto right_atom_idx = right->second; + left_backbone_links.erase(right_atom_idx); + int right_idx; + for (auto idx : sequences[seq_start[right_atom_idx]]) + { + joined_sequences.back().emplace_back(idx); + right_idx = idx; + } + right = right_backbone_links.find(right_idx); + } } + sequences = joined_sequences; } - sequences = joined_sequences; } - } - for (auto& sequence : sequences) - { - std::string seq_string; - if (sf == SeqFormat::IDT) + for (auto& sequence : sequences) { - seq_string.append(saveIdt(mol, sequence)); - } - else - { - for (auto atom_idx : sequence) + std::string seq_string; + if (sf == SeqFormat::IDT) + { + seq_string.append(saveIdt(mol, sequence)); + } + else { - if (mol.isTemplateAtom(atom_idx)) + for (auto atom_idx : sequence) { - std::string mon_class = mol.getTemplateAtomClass(atom_idx); - if (isBackboneClass(mon_class)) + if (mol.isTemplateAtom(atom_idx)) { - std::string label; - if (mon_class == kMonomerClassSUGAR) + std::string mon_class = mol.getTemplateAtomClass(atom_idx); + if (isBackboneClass(mon_class)) { - auto& v = mol.getVertex(atom_idx); - for (auto nei_idx = v.neiBegin(); nei_idx < v.neiEnd(); nei_idx = v.neiNext(nei_idx)) + std::string label; + if (mon_class == kMonomerClassSUGAR) { - int nei_atom_idx = v.neiVertex(nei_idx); - if (mol.isTemplateAtom(nei_atom_idx) && std::string(mol.getTemplateAtomClass(nei_atom_idx)) == kMonomerClassBASE) + auto& v = mol.getVertex(atom_idx); + for (auto nei_idx = v.neiBegin(); nei_idx < v.neiEnd(); nei_idx = v.neiNext(nei_idx)) { - mon_class = kMonomerClassBASE; - atom_idx = nei_atom_idx; - label = monomerAliasByName(mon_class, mol.getTemplateAtom(nei_atom_idx)); - break; + int nei_atom_idx = v.neiVertex(nei_idx); + if (mol.isTemplateAtom(nei_atom_idx) && std::string(mol.getTemplateAtomClass(nei_atom_idx)) == kMonomerClassBASE) + { + mon_class = kMonomerClassBASE; + atom_idx = nei_atom_idx; + label = monomerAliasByName(mon_class, mol.getTemplateAtom(nei_atom_idx)); + break; + } } } - } - else if (isAminoAcidClass(mon_class)) - { - mon_class = kMonomerClassAA; - label = monomerAliasByName(kMonomerClassAA, mol.getTemplateAtom(atom_idx)); - } - else if (isNucleotideClass(mon_class)) - { - mon_class = kMonomerClassBASE; // treat nucleotide symbol as a base - label = monomerAliasByName(kMonomerClassBASE, mol.getTemplateAtom(atom_idx)); - } + else if (isAminoAcidClass(mon_class)) + { + mon_class = kMonomerClassAA; + label = monomerAliasByName(kMonomerClassAA, mol.getTemplateAtom(atom_idx)); + } + else if (isNucleotideClass(mon_class)) + { + mon_class = kMonomerClassBASE; // treat nucleotide symbol as a base + label = monomerAliasByName(kMonomerClassBASE, mol.getTemplateAtom(atom_idx)); + } - if (label.size()) - { - TGroup temp; - if (!_mon_lib.getMonomerTemplate(mon_class, label, temp)) + if (label.size()) { - // if symbol is not standard, check its natural analog - const char* natrep = nullptr; - int temp_idx = mol.getTemplateAtomTemplateIndex(atom_idx); - if (temp_idx > -1) - { - auto& tg = mol.tgroups.getTGroup(temp_idx); - natrep = tg.tgroup_natreplace.ptr(); - } - else + TGroup temp; + if (!_mon_lib.getMonomerTemplate(mon_class, label, temp)) { - auto tg_ref = findTemplateInMap(label, mon_class, _templates); - if (tg_ref.has_value()) + // if symbol is not standard, check its natural analog + const char* natrep = nullptr; + int temp_idx = mol.getTemplateAtomTemplateIndex(atom_idx); + if (temp_idx > -1) { - auto& tg = tg_ref.value().get(); + auto& tg = mol.tgroups.getTGroup(temp_idx); natrep = tg.tgroup_natreplace.ptr(); } + else + { + auto tg_ref = findTemplateInMap(label, mon_class, _templates); + if (tg_ref.has_value()) + { + auto& tg = tg_ref.value().get(); + natrep = tg.tgroup_natreplace.ptr(); + } + } + std::string natural_analog; + if (natrep) + natural_analog = monomerAliasByName(mon_class, extractMonomerName(natrep)); + + if (_mon_lib.getMonomerTemplate(mon_class, natural_analog, temp)) + label = natural_analog; + else if (mon_class == kMonomerClassBASE) + label = "N"; + else if (mon_class == kMonomerClassAA) + label = "X"; } - std::string natural_analog; - if (natrep) - natural_analog = monomerAliasByName(mon_class, extractMonomerName(natrep)); - if (_mon_lib.getMonomerTemplate(mon_class, natural_analog, temp)) - label = natural_analog; - else if (mon_class == kMonomerClassBASE) - label = "N"; - else if (mon_class == kMonomerClassAA) - label = "X"; + if (label.size() > 1) + throw Error("Can't save '%s' to sequence format", label.c_str()); + seq_string += label; } - - if (label.size() > 1) - throw Error("Can't save '%s' to sequence format", label.c_str()); - seq_string += label; } } } } - } - if (seq_string.size()) - { - // sequences separators are different for FASTA, IDT and Sequence - if (sf == SeqFormat::FASTA) + if (seq_string.size()) { - if (seq_idx) - seq_text += "\n"; - std::string fasta_header = ">Sequence"; - fasta_header += std::to_string(seq_idx + 1); - if (prop_it != mol_properties.end()) + // sequences separators are different for FASTA, IDT and Sequence + if (sf == SeqFormat::FASTA) { - auto& props = mol_properties.value(prop_it); - prop_it++; - if (props.contains(kFASTA_HEADER)) - fasta_header = props.at(kFASTA_HEADER); + if (seq_idx) + seq_text += "\n"; + std::string fasta_header = ">Sequence"; + fasta_header += std::to_string(seq_idx + 1); + if (prop_it != mol_properties.end()) + { + auto& props = mol_properties.value(prop_it); + prop_it++; + if (props.contains(kFASTA_HEADER)) + fasta_header = props.at(kFASTA_HEADER); + } + fasta_header += "\n"; + seq_text += fasta_header; } - fasta_header += "\n"; - seq_text += fasta_header; - } - else if (seq_text.size()) - seq_text += sf == SeqFormat::Sequence ? " " : "\n"; + else if (seq_text.size()) + seq_text += sf == SeqFormat::Sequence ? " " : "\n"; - seq_text += seq_string.substr(0, SEQ_LINE_LENGTH); - for (size_t format_ind = SEQ_LINE_LENGTH; format_ind < seq_string.size(); format_ind += SEQ_LINE_LENGTH) - { - seq_text += "\n"; - seq_text += seq_string.substr(format_ind, SEQ_LINE_LENGTH); + seq_text += seq_string.substr(0, SEQ_LINE_LENGTH); + for (size_t format_ind = SEQ_LINE_LENGTH; format_ind < seq_string.size(); format_ind += SEQ_LINE_LENGTH) + { + seq_text += "\n"; + seq_text += seq_string.substr(format_ind, SEQ_LINE_LENGTH); + } + seq_idx++; } - seq_idx++; } } if (seq_text.size()) From 9e0dcf21cfd516c208cecddfc0174d7cb41cc63f Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Fri, 28 Jun 2024 21:07:35 +0300 Subject: [PATCH 2/2] Fix typo --- .../ref/formats/helm_to_ket.py.out | 3 ++ .../ref/formats/ket_to_helm.py.out | 3 ++ .../integration/tests/formats/helm_to_ket.py | 6 +-- .../integration/tests/formats/ket_to_helm.py | 2 +- .../tests/formats/ref/helm_chem_peptide.ket | 48 +++++++++---------- .../molecule/src/sequence_loader.cpp | 7 ++- .../molecule/src/sequence_saver.cpp | 4 +- 7 files changed, 41 insertions(+), 32 deletions(-) diff --git a/api/tests/integration/ref/formats/helm_to_ket.py.out b/api/tests/integration/ref/formats/helm_to_ket.py.out index a449526b5b..eb73851d61 100644 --- a/api/tests/integration/ref/formats/helm_to_ket.py.out +++ b/api/tests/integration/ref/formats/helm_to_ket.py.out @@ -1,4 +1,7 @@ *** HELM to KET *** +helm_annotations.ket:SUCCEED +helm_chem_peptide.ket:SUCCEED helm_multi_char_rna.ket:SUCCEED +helm_peptide.ket:SUCCEED helm_simple_rna.ket:SUCCEED Test 'PEPTIDE1{A'2'}$$$$V2.0': got expected error 'Repeating do not supported now.' diff --git a/api/tests/integration/ref/formats/ket_to_helm.py.out b/api/tests/integration/ref/formats/ket_to_helm.py.out index f7f24ed0b7..b910c765a7 100644 --- a/api/tests/integration/ref/formats/ket_to_helm.py.out +++ b/api/tests/integration/ref/formats/ket_to_helm.py.out @@ -1,3 +1,6 @@ *** KET to HELM *** +helm_annotations.ket:SUCCEED +helm_chem_peptide.ket:SUCCEED helm_multi_char_rna.ket:SUCCEED +helm_peptide.ket:SUCCEED helm_simple_rna.ket:SUCCEED diff --git a/api/tests/integration/tests/formats/helm_to_ket.py b/api/tests/integration/tests/formats/helm_to_ket.py index a82368519f..e5812b413d 100644 --- a/api/tests/integration/tests/formats/helm_to_ket.py +++ b/api/tests/integration/tests/formats/helm_to_ket.py @@ -32,7 +32,7 @@ def find_diff(a, b): "helm_simple_rna": "RNA1{R(U)P.R(T)P.R(G)P.R(C)P.R(A)}$$$$V2.0", "helm_multi_char_rna": "RNA1{R(U)P.R(T)P.R(G)P.R(C)P.R([daA])}$$$$V2.0", "helm_peptide": "PEPTIDE1{A.[meA].C}$$$$V2.0", - "helm_chem_peptide": "CHEM1{[PEG2]}|PEPTIDE1{W.N.D.[Pen].G.[Orn].D.A.D.G.S.G.[Cap]}$CHEM1,PEPTIDE1,1:R1-1:R0$$$V2.0", + "helm_chem_peptide": "CHEM1{[PEG2]}|PEPTIDE1{W.N.D.[Pen].G.[Orn].D.A.D.G.S.G.[Cap]}$CHEM1,PEPTIDE1,1:R1-1:R1$$$V2.0", "helm_annotations": 'BLOB1{BEAD}"Animated Polystyrene"|CHEM1{[hxy]"Annotation"}|RNA1{R(A"mutation")P.R(U)P.R(G)P}$$$$V2.0', } @@ -42,8 +42,8 @@ def find_diff(a, b): for filename in sorted(helm_data.keys()): mol = indigo.loadHelm(helm_data[filename]) - with open(os.path.join(ref_path, filename) + ".ket", "w") as file: - file.write(mol.json()) + # with open(os.path.join(ref_path, filename) + ".ket", "w") as file: + # file.write(mol.json()) with open(os.path.join(ref_path, filename) + ".ket", "r") as file: ket_ref = file.read() ket = mol.json() diff --git a/api/tests/integration/tests/formats/ket_to_helm.py b/api/tests/integration/tests/formats/ket_to_helm.py index 673ceab390..94b08e52f1 100644 --- a/api/tests/integration/tests/formats/ket_to_helm.py +++ b/api/tests/integration/tests/formats/ket_to_helm.py @@ -34,7 +34,7 @@ def find_diff(a, b): "helm_simple_rna": "RNA1{R(U)P.R(T)P.R(G)P.R(C)P.R(A)}$$$$V2.0", "helm_multi_char_rna": "RNA1{R(U)P.R(T)P.R(G)P.R(C)P.R([daA])}$$$$V2.0", "helm_peptide": "PEPTIDE1{A.[meA].C}$$$$V2.0", - "helm_chem_peptide": "CHEM1{[PEG2]}|PEPTIDE1{W.N.D.[Pen].G.[Orn].D.A.D.G.S.G.[Cap]}$CHEM1,PEPTIDE1,1:R1-1:R0$$$V2.0", + "helm_chem_peptide": "CHEM1{[PEG2]}|PEPTIDE1{W.N.D.[Pen].G.[Orn].D.A.D.G.S.G.[Cap]}$CHEM1,PEPTIDE1,1:R1-1:R1$$$V2.0", "helm_annotations": "CHEM1{[hxy]}|RNA1{R(A)P.R(U)P.R(G)P}$$$$V2.0", } diff --git a/api/tests/integration/tests/formats/ref/helm_chem_peptide.ket b/api/tests/integration/tests/formats/ref/helm_chem_peptide.ket index 24b9933141..69113b335b 100644 --- a/api/tests/integration/tests/formats/ref/helm_chem_peptide.ket +++ b/api/tests/integration/tests/formats/ref/helm_chem_peptide.ket @@ -45,17 +45,6 @@ } ], "connections": [ - { - "connectionType": "single", - "endpoint1": { - "monomerId": "monomer0", - "attachmentPointId": "R2" - }, - "endpoint2": { - "monomerId": "monomer1", - "attachmentPointId": "R1" - } - }, { "connectionType": "single", "endpoint1": { @@ -187,6 +176,17 @@ "monomerId": "monomer13", "attachmentPointId": "R1" } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R1" + }, + "endpoint2": { + "monomerId": "monomer1", + "attachmentPointId": "R1" + } } ], "templates": [ @@ -235,7 +235,7 @@ "monomer1": { "type": "monomer", "id": "1", - "seqid": 2, + "seqid": 1, "position": { "x": 0.0, "y": -1.600000023841858 @@ -246,7 +246,7 @@ "monomer2": { "type": "monomer", "id": "2", - "seqid": 3, + "seqid": 2, "position": { "x": 1.600000023841858, "y": -1.600000023841858 @@ -257,7 +257,7 @@ "monomer3": { "type": "monomer", "id": "3", - "seqid": 4, + "seqid": 3, "position": { "x": 3.200000047683716, "y": -1.600000023841858 @@ -268,7 +268,7 @@ "monomer4": { "type": "monomer", "id": "4", - "seqid": 5, + "seqid": 4, "position": { "x": 4.800000190734863, "y": -1.600000023841858 @@ -279,7 +279,7 @@ "monomer5": { "type": "monomer", "id": "5", - "seqid": 6, + "seqid": 5, "position": { "x": 6.400000095367432, "y": -1.600000023841858 @@ -290,7 +290,7 @@ "monomer6": { "type": "monomer", "id": "6", - "seqid": 7, + "seqid": 6, "position": { "x": 8.0, "y": -1.600000023841858 @@ -301,7 +301,7 @@ "monomer7": { "type": "monomer", "id": "7", - "seqid": 8, + "seqid": 7, "position": { "x": 9.600000381469727, "y": -1.600000023841858 @@ -312,7 +312,7 @@ "monomer8": { "type": "monomer", "id": "8", - "seqid": 9, + "seqid": 8, "position": { "x": 11.199999809265137, "y": -1.600000023841858 @@ -323,7 +323,7 @@ "monomer9": { "type": "monomer", "id": "9", - "seqid": 10, + "seqid": 9, "position": { "x": 12.800000190734864, "y": -1.600000023841858 @@ -334,7 +334,7 @@ "monomer10": { "type": "monomer", "id": "10", - "seqid": 11, + "seqid": 10, "position": { "x": 14.40000057220459, "y": -1.600000023841858 @@ -345,7 +345,7 @@ "monomer11": { "type": "monomer", "id": "11", - "seqid": 12, + "seqid": 11, "position": { "x": 16.0, "y": -1.600000023841858 @@ -356,7 +356,7 @@ "monomer12": { "type": "monomer", "id": "12", - "seqid": 13, + "seqid": 12, "position": { "x": 17.600000381469728, "y": -1.600000023841858 @@ -367,7 +367,7 @@ "monomer13": { "type": "monomer", "id": "13", - "seqid": 14, + "seqid": 13, "position": { "x": 19.200000762939454, "y": -1.600000023841858 diff --git a/core/indigo-core/molecule/src/sequence_loader.cpp b/core/indigo-core/molecule/src/sequence_loader.cpp index 318ac845b9..6ce9a338bb 100644 --- a/core/indigo-core/molecule/src/sequence_loader.cpp +++ b/core/indigo-core/molecule/src/sequence_loader.cpp @@ -24,6 +24,7 @@ #include "base_cpp/scanner.h" #include "layout/molecule_layout.h" #include "layout/sequence_layout.h" +#include "molecule/ket_commons.h" #include "molecule/molecule.h" #include "molecule/monomer_commons.h" #include "molecule/sequence_loader.h" @@ -1070,6 +1071,7 @@ void SequenceLoader::loadHELM(BaseMolecule& mol) Array position; size_t error_pos; _scanner.readWord(position, ":"); + _scanner.skip(1); left_monomer_idx = std::stoi(position.ptr(), &error_pos); if (error_pos != position.size() - 1) // arrray contains 0 at the end throw Error("Only direct connections supported now."); @@ -1084,8 +1086,9 @@ void SequenceLoader::loadHELM(BaseMolecule& mol) _scanner.readWord(right_ap, "\"|$"); int left_templ_atom_idx = used_polymer_nums[left_polymer][left_monomer_idx]; int right_templ_atom_idx = used_polymer_nums[right_polymer][right_monomer_idx]; - mol.setTemplateAtomAttachmentOrder(left_templ_atom_idx, right_templ_atom_idx, left_ap.ptr()); - mol.setTemplateAtomAttachmentOrder(right_templ_atom_idx, left_templ_atom_idx, right_ap.ptr()); + mol.asMolecule().addBond_Silent(left_templ_atom_idx, right_templ_atom_idx, BOND_SINGLE); + mol.setTemplateAtomAttachmentOrder(left_templ_atom_idx, right_templ_atom_idx, convertAPFromHELM(left_ap.ptr()).c_str()); + mol.setTemplateAtomAttachmentOrder(right_templ_atom_idx, left_templ_atom_idx, convertAPFromHELM(right_ap.ptr()).c_str()); if (_scanner.isEOF()) throw Error(unexpected_eod); ch = _scanner.readChar(); diff --git a/core/indigo-core/molecule/src/sequence_saver.cpp b/core/indigo-core/molecule/src/sequence_saver.cpp index cd80e0d9df..0e33d341f0 100644 --- a/core/indigo-core/molecule/src/sequence_saver.cpp +++ b/core/indigo-core/molecule/src/sequence_saver.cpp @@ -454,7 +454,7 @@ std::string SequenceSaver::saveHELM(BaseMolecule& mol, std::vector= 0) { helm_string += 'R'; - helm_string += std::to_string(nei_ap_id); + helm_string += std::to_string(nei_ap_id + 1); } else {