From e94605bd69cc10afdff72d71b9abb9670b1a9e39 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Fri, 29 Sep 2023 17:46:58 +0300 Subject: [PATCH] #1287 New S-Group type Query component level grouping (#1288) --- .../formats/ket_with_query_components.py.out | 2 + .../formats/ket_with_query_components.py | 35 ++++ .../formats/ref/ket_with_query_components.ket | 198 ++++++++++++++++++ core/indigo-core/molecule/ket_commons.h | 6 + .../molecule/molecule_json_loader.h | 1 + .../molecule/src/molecule_json_loader.cpp | 27 ++- .../molecule/src/molecule_json_saver.cpp | 33 ++- .../molecule/src/query_molecule.cpp | 1 - 8 files changed, 298 insertions(+), 5 deletions(-) create mode 100644 api/tests/integration/ref/formats/ket_with_query_components.py.out create mode 100644 api/tests/integration/tests/formats/ket_with_query_components.py create mode 100644 api/tests/integration/tests/formats/ref/ket_with_query_components.ket diff --git a/api/tests/integration/ref/formats/ket_with_query_components.py.out b/api/tests/integration/ref/formats/ket_with_query_components.py.out new file mode 100644 index 0000000000..78b56b42bc --- /dev/null +++ b/api/tests/integration/ref/formats/ket_with_query_components.py.out @@ -0,0 +1,2 @@ +*** KET with query components *** +ket_with_query_components.ket:SUCCEED diff --git a/api/tests/integration/tests/formats/ket_with_query_components.py b/api/tests/integration/tests/formats/ket_with_query_components.py new file mode 100644 index 0000000000..a6ad072a39 --- /dev/null +++ b/api/tests/integration/tests/formats/ket_with_query_components.py @@ -0,0 +1,35 @@ +import difflib +import os +import sys + + +def find_diff(a, b): + return "\n".join(difflib.unified_diff(a.splitlines(), b.splitlines())) + + +sys.path.append( + os.path.normpath( + os.path.join(os.path.abspath(__file__), "..", "..", "..", "common") + ) +) +from env_indigo import * # noqa + +indigo = Indigo() +indigo.setOption("json-saving-pretty", True) + +print("*** KET with query components ***") + +ref_path = joinPathPy("ref/", __file__) +name = "ket_with_query_components.ket" +filename = os.path.join(ref_path, name) + +mol = indigo.loadQueryMoleculeFromFile(filename) +with open(filename, "r") as file: + ket_ref = file.read() +ket = mol.json() +diff = find_diff(ket_ref, ket) +if not diff: + print(name + ":SUCCEED") +else: + print(name + ":FAILED") + print(diff) diff --git a/api/tests/integration/tests/formats/ref/ket_with_query_components.ket b/api/tests/integration/tests/formats/ref/ket_with_query_components.ket new file mode 100644 index 0000000000..2f3eddf7c2 --- /dev/null +++ b/api/tests/integration/tests/formats/ref/ket_with_query_components.ket @@ -0,0 +1,198 @@ +{ + "root": { + "nodes": [ + { + "$ref": "mol0" + }, + { + "$ref": "mol1" + }, + { + "$ref": "mol2" + } + ] + }, + "mol0": { + "type": "molecule", + "atoms": [ + { + "label": "C", + "location": [ + 3.2669873237609865, + -2.674999952316284, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 4.133012771606445, + -2.174999952316284, + 0.0 + ], + "queryProperties": { + "connectivity": 2, + "ringSize": 4 + } + }, + { + "label": "C", + "location": [ + 5.016987323760986, + -2.299999952316284, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 5.883012771606445, + -1.7999999523162842, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ] + } + ] + }, + "mol1": { + "type": "molecule", + "atoms": [ + { + "label": "C", + "location": [ + 7.550000190734863, + -3.674999952316284, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 8.050000190734864, + -2.808974504470825, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 9.324999809265137, + -3.299999952316284, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 8.61789321899414, + -4.007106781005859, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + } + ], + "sgroups": [ + { + "type": "queryComponent", + "atoms": [ + 0, + 1, + 2, + 3 + ] + } + ] + }, + "mol2": { + "type": "molecule", + "atoms": [ + { + "label": "C", + "location": [ + 3.0999999046325685, + -6.375, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.8071067333221437, + -7.082106590270996, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 2.9410812854766847, + -7.582106590270996, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ] + } + ], + "sgroups": [ + { + "type": "queryComponent", + "atoms": [ + 0, + 1, + 2 + ] + } + ] + } +} \ No newline at end of file diff --git a/core/indigo-core/molecule/ket_commons.h b/core/indigo-core/molecule/ket_commons.h index 31e789dd4d..3cfb2935de 100644 --- a/core/indigo-core/molecule/ket_commons.h +++ b/core/indigo-core/molecule/ket_commons.h @@ -27,6 +27,7 @@ #include "common/math/algebra.h" #include "graph/graph.h" #include "molecule/molecule_cip_calculator.h" +#include "molecule/query_molecule.h" #include "reaction/base_reaction.h" namespace indigo @@ -111,6 +112,11 @@ namespace indigo for (auto atom_idx : sgroup.atoms) sg_set.insert(atom_idx); } + if (mol.isQueryMolecule()) + { + QueryMolecule& qmol = static_cast(mol); + qmol.getComponentNeighbors(neighbors); + } } class KETSimpleObject : public MetaObject diff --git a/core/indigo-core/molecule/molecule_json_loader.h b/core/indigo-core/molecule/molecule_json_loader.h index d5bc529281..0f1d143598 100644 --- a/core/indigo-core/molecule/molecule_json_loader.h +++ b/core/indigo-core/molecule/molecule_json_loader.h @@ -110,6 +110,7 @@ namespace indigo Molecule* _pmol; QueryMolecule* _pqmol; std::vector _stereo_centers; + unsigned int components_count; }; } // namespace indigo diff --git a/core/indigo-core/molecule/src/molecule_json_loader.cpp b/core/indigo-core/molecule/src/molecule_json_loader.cpp index f1d5aa084b..7c7c10a523 100644 --- a/core/indigo-core/molecule/src/molecule_json_loader.cpp +++ b/core/indigo-core/molecule/src/molecule_json_loader.cpp @@ -20,7 +20,8 @@ using namespace std; IMPL_ERROR(MoleculeJsonLoader, "molecule json loader"); MoleculeJsonLoader::MoleculeJsonLoader(Document& ket) - : _mol_array(kArrayType), _mol_nodes(_mol_array), _meta_objects(kArrayType), _pmol(0), _pqmol(0), ignore_noncritical_query_features(false) + : _mol_array(kArrayType), _mol_nodes(_mol_array), _meta_objects(kArrayType), _pmol(0), _pqmol(0), ignore_noncritical_query_features(false), + components_count(0) { Value& root = ket["root"]; Value& nodes = root["nodes"]; @@ -56,7 +57,7 @@ MoleculeJsonLoader::MoleculeJsonLoader(Document& ket) MoleculeJsonLoader::MoleculeJsonLoader(Value& mol_nodes) : _mol_nodes(mol_nodes), _meta_objects(kArrayType), _pmol(0), _pqmol(0), ignore_noncritical_query_features(false), ignore_no_chiral_flag(false), - skip_3d_chirality(false), treat_x_as_pseudoatom(false), treat_stereo_as(0) + skip_3d_chirality(false), treat_x_as_pseudoatom(false), treat_stereo_as(0), components_count(0) { } @@ -672,6 +673,12 @@ void MoleculeJsonLoader::parseBonds(const rapidjson::Value& bonds, BaseMolecule& } } + if (b.HasMember("customQuery")) + { + std::string customQuery = b["customQuery"].GetString(); + // 2do process custom query + } + if (b.HasMember("cip")) { std::string cip = b["cip"].GetString(); @@ -844,11 +851,25 @@ void MoleculeJsonLoader::parseSGroups(const rapidjson::Value& sgroups, BaseMolec for (SizeType i = 0; i < sgroups.Size(); i++) { const Value& s = sgroups[i]; + const Value& atoms = s["atoms"]; std::string sg_type_str = s["type"].GetString(); // GEN, MUL, SRU, SUP + if (sg_type_str == "queryComponent") + { + if (_pqmol) + { + _pqmol->components.expandFill(_pqmol->components.size() + atoms.Size(), 0); + components_count++; + for (int j = 0; j < atoms.Size(); ++j) + { + int atom_idx = atoms[j].GetInt(); + _pqmol->components[atom_idx] = components_count; + } + } + continue; + } int sg_type = SGroup::getType(sg_type_str.c_str()); int grp_idx = mol.sgroups.addSGroup(sg_type); SGroup& sgroup = mol.sgroups.getSGroup(grp_idx); - const Value& atoms = s["atoms"]; // add atoms std::unordered_set sgroup_atoms; for (int j = 0; j < atoms.Size(); ++j) diff --git a/core/indigo-core/molecule/src/molecule_json_saver.cpp b/core/indigo-core/molecule/src/molecule_json_saver.cpp index 5187209be2..3fc0b7bca7 100644 --- a/core/indigo-core/molecule/src/molecule_json_saver.cpp +++ b/core/indigo-core/molecule/src/molecule_json_saver.cpp @@ -149,7 +149,19 @@ void MoleculeJsonSaver::saveSGroups(BaseMolecule& mol, JsonWriter& writer) { QS_DEF(Array, sgs_sorted); _checkSGroupIndices(mol, sgs_sorted); - if (mol.countSGroups() > 0) + int sGroupsCount = mol.countSGroups(); + bool componentDefined = false; + if (mol.isQueryMolecule()) + { + QueryMolecule& qmol = static_cast(mol); + if (qmol.components.size() > 0 && qmol.components[0]) + { + componentDefined = true; + sGroupsCount++; + } + } + + if (sGroupsCount > 0) { writer.Key("sgroups"); writer.StartArray(); @@ -160,6 +172,25 @@ void MoleculeJsonSaver::saveSGroups(BaseMolecule& mol, JsonWriter& writer) auto& sgrp = mol.sgroups.getSGroup(sg_idx); saveSGroup(sgrp, writer); } + // save queryComponent + if (mol.isQueryMolecule() && componentDefined) + { + QueryMolecule& qmol = static_cast(mol); + writer.StartObject(); + writer.Key("type"); + writer.String("queryComponent"); + writer.Key("atoms"); + writer.StartArray(); + for (int i = 0; i < qmol.vertexCount(); i++) + { + if (qmol.components[i]) + { + writer.Int(i); + } + } + writer.EndArray(); + writer.EndObject(); + } writer.EndArray(); } } diff --git a/core/indigo-core/molecule/src/query_molecule.cpp b/core/indigo-core/molecule/src/query_molecule.cpp index becf3a019a..967b8fd751 100644 --- a/core/indigo-core/molecule/src/query_molecule.cpp +++ b/core/indigo-core/molecule/src/query_molecule.cpp @@ -2180,7 +2180,6 @@ void QueryMolecule::getComponentNeighbors(std::list>& co componentAtoms[componentId].insert(i); } } - componentNeighbors.clear(); for (auto elem : componentAtoms) { auto atoms = elem.second;