From fe00419bba32bb8a176159d8edeb915f50c7cc22 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Tue, 2 Apr 2024 18:11:41 +0300 Subject: [PATCH] Backmerge: #1881 Macro: Cannot load Peptides from our Library that are not connected by bonds using FASTA file (#1897) --- .../ref/formats/fasta_to_ket.py.out | 1 + .../integration/tests/formats/fasta_to_ket.py | 1 + .../tests/formats/molecules/test_1881.fasta | 8 + .../tests/formats/ref/test_1881.ket | 579 ++++++++++++++++++ .../molecule/src/sequence_loader.cpp | 3 +- 5 files changed, 591 insertions(+), 1 deletion(-) create mode 100644 api/tests/integration/tests/formats/molecules/test_1881.fasta create mode 100644 api/tests/integration/tests/formats/ref/test_1881.ket diff --git a/api/tests/integration/ref/formats/fasta_to_ket.py.out b/api/tests/integration/ref/formats/fasta_to_ket.py.out index a3c9143fef..1d611c3a25 100644 --- a/api/tests/integration/ref/formats/fasta_to_ket.py.out +++ b/api/tests/integration/ref/formats/fasta_to_ket.py.out @@ -5,3 +5,4 @@ test_dna.ket:SUCCEED multiseq.ket:SUCCEED break.ket:SUCCEED comment.ket:SUCCEED +test_1881.ket:SUCCEED diff --git a/api/tests/integration/tests/formats/fasta_to_ket.py b/api/tests/integration/tests/formats/fasta_to_ket.py index f1cd563a23..54901827d8 100644 --- a/api/tests/integration/tests/formats/fasta_to_ket.py +++ b/api/tests/integration/tests/formats/fasta_to_ket.py @@ -30,6 +30,7 @@ def find_diff(a, b): {"file": "multiseq", "seq_type": "DNA"}, {"file": "break", "seq_type": "PEPTIDE"}, {"file": "comment", "seq_type": "PEPTIDE"}, + {"file": "test_1881", "seq_type": "PEPTIDE"}, ] for desc in fasta_files: diff --git a/api/tests/integration/tests/formats/molecules/test_1881.fasta b/api/tests/integration/tests/formats/molecules/test_1881.fasta new file mode 100644 index 0000000000..8a205a5b1c --- /dev/null +++ b/api/tests/integration/tests/formats/molecules/test_1881.fasta @@ -0,0 +1,8 @@ +>Sequence1 +A +>Sequence2 +A +>Sequence3 +D +>Sequence4 +C diff --git a/api/tests/integration/tests/formats/ref/test_1881.ket b/api/tests/integration/tests/formats/ref/test_1881.ket new file mode 100644 index 0000000000..d920dfc642 --- /dev/null +++ b/api/tests/integration/tests/formats/ref/test_1881.ket @@ -0,0 +1,579 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer0" + }, + { + "$ref": "monomer1" + }, + { + "$ref": "monomer2" + }, + { + "$ref": "monomer3" + } + ], + "connections": [], + "templates": [ + { + "$ref": "monomerTemplate-Ala" + }, + { + "$ref": "monomerTemplate-Asp" + }, + { + "$ref": "monomerTemplate-Cys" + } + ] + }, + "monomer0": { + "type": "monomer", + "id": "0", + "seqid": 1, + "position": { + "x": 0.0, + "y": -0.0 + }, + "alias": "Ala", + "templateId": "Ala" + }, + "monomer1": { + "type": "monomer", + "id": "1", + "seqid": 1, + "position": { + "x": 0.0, + "y": -1.600000023841858 + }, + "alias": "Ala", + "templateId": "Ala" + }, + "monomer2": { + "type": "monomer", + "id": "2", + "seqid": 1, + "position": { + "x": 0.0, + "y": -3.200000047683716 + }, + "alias": "Asp", + "templateId": "Asp" + }, + "monomer3": { + "type": "monomer", + "id": "3", + "seqid": 1, + "position": { + "x": 0.0, + "y": -4.800000190734863 + }, + "alias": "Cys", + "templateId": "Cys" + }, + "monomerTemplate-Ala": { + "type": "monomerTemplate", + "id": "Ala", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "A", + "name": "Ala", + "fullName": "Alanine", + "naturalAnalogShort": "A", + "naturalAnalog": "Ala", + "attachmentPoints": [ + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 6 + ] + } + }, + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 5 + ] + } + } + ], + "atoms": [ + { + "label": "N", + "location": [ + -0.9805331230163574, + -0.3062945008277893, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.21253088116645814, + 0.2057330161333084, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + -0.24245710670948029, + 1.3590255975723267, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.8222288489341736, + -0.3062945008277893, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.846138596534729, + -1.2284597158432007, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.5903092622756959, + 0.2057330161333084, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -1.823233723640442, + 0.07071340084075928, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 1, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 6 + ] + } + ] + }, + "monomerTemplate-Asp": { + "type": "monomerTemplate", + "id": "Asp", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "D", + "name": "Asp", + "fullName": "Aspartic acid", + "naturalAnalogShort": "D", + "naturalAnalog": "Asp", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 4 + ] + } + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 9 + ] + } + }, + { + "attachmentAtom": 8, + "leavingGroup": { + "atoms": [ + 10 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.63100004196167, + -1.557800054550171, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.632699966430664, + -2.7392001152038576, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.3506999909877777, + -0.8201000094413757, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "N", + "location": [ + -0.9294999837875366, + -1.557800054550171, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -1.9524999856948853, + -0.9668999910354614, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.34850001335144045, + 0.6575000286102295, + 0.0 + ] + }, + { + "label": "C", + "location": [ + -0.9316999912261963, + 1.3952000141143799, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -1.954200029373169, + 0.8032000064849854, + 0.0 + ] + }, + { + "label": "O", + "location": [ + -0.9334999918937683, + 2.5766000747680666, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.65339994430542, + -0.9657999873161316, + 0.0 + ] + }, + { + "label": "H", + "location": [ + 0.08510000258684159, + 3.175100088119507, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 1, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 6 + ] + }, + { + "type": 2, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 8 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 10 + ] + } + ] + }, + "monomerTemplate-Cys": { + "type": "monomerTemplate", + "id": "Cys", + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "alias": "C", + "name": "Cys", + "fullName": "Cysteine", + "naturalAnalogShort": "C", + "naturalAnalog": "Cys", + "attachmentPoints": [ + { + "attachmentAtom": 4, + "leavingGroup": { + "atoms": [ + 7 + ] + } + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 6 + ] + } + }, + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.4457000494003297, + -1.1332999467849732, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.1453000009059906, + -0.3840000033378601, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.14300000667572022, + 1.1167999505996705, + 0.0 + ] + }, + { + "label": "S", + "location": [ + -1.1572999954223633, + 1.8660999536514283, + 0.0 + ] + }, + { + "label": "N", + "location": [ + -1.1550999879837037, + -1.1332999467849732, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.4474999904632569, + -2.3333001136779787, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.4842000007629396, + -0.5320000052452087, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -2.194200038909912, + -0.5331000089645386, + 0.0 + ] + }, + { + "label": "H", + "location": [ + -1.15910005569458, + 3.0660998821258547, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 5, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 8 + ] + } + ] + } +} \ No newline at end of file diff --git a/core/indigo-core/molecule/src/sequence_loader.cpp b/core/indigo-core/molecule/src/sequence_loader.cpp index 0879fdda3b..937fc29e6c 100644 --- a/core/indigo-core/molecule/src/sequence_loader.cpp +++ b/core/indigo-core/molecule/src/sequence_loader.cpp @@ -89,7 +89,8 @@ void SequenceLoader::loadFasta(BaseMolecule& mol, SeqType seq_type) _row++; } properties.insert(kFASTA_HEADER, fasta_str); - frag_idx++; + if (mol.vertexCount() > 0) // do not increment fragment id if first fragment + frag_idx++; continue; break; default: