From 3bd1ae6c367916e193863cd49ab4cb6f8d8c8103 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Fri, 16 Feb 2024 12:16:41 +0300 Subject: [PATCH] Backmegre: #1533 System attach two explicit hydrogens to atoms connected to "any type" bonds (#1612) (#1708) --- .../ref/arom/arom_d_orbital.py.out | 8 +-- .../integration/ref/basic/fold_unfold.py.out | 59 +++++++++++++++++++ .../integration/ref/basic/validate.py.out | 8 +-- .../integration/tests/basic/fold_unfold.py | 56 ++++++++++++++++++ .../data/reactions/checkreaction/std.json | 2 +- core/indigo-core/molecule/src/molecule.cpp | 2 +- .../molecule/src/query_molecule.cpp | 21 +++++-- 7 files changed, 141 insertions(+), 15 deletions(-) diff --git a/api/tests/integration/ref/arom/arom_d_orbital.py.out b/api/tests/integration/ref/arom/arom_d_orbital.py.out index f3c7cd20cf..692acaaf89 100644 --- a/api/tests/integration/ref/arom/arom_d_orbital.py.out +++ b/api/tests/integration/ref/arom/arom_d_orbital.py.out @@ -1,8 +1,8 @@ *** 0 *** COC(c1[n]2s(c3c(c4c2cccc4)cccc3)(C(C)C)c1C(OC)=O)=O -element: can not calculate implicit hydrogens on aromatic S, charge 0, degree 4, 0 radical electrons +element: cannot calculate implicit hydrogens on aromatic S, charge 0, degree 4, 0 radical electrons COC(c1[n]2s(c3c(c4c2cccc4)cccc3)(C(C)C)c1C(OC)=O)=O -element: can not calculate implicit hydrogens on aromatic S, charge 0, degree 4, 0 radical electrons +element: cannot calculate implicit hydrogens on aromatic S, charge 0, degree 4, 0 radical electrons *** 1 *** COC(C1N2S(C3C(C4C2=CC=CC=4)=CC=CC=3)(C(C)C)C=1C(OC)=O)=O CC(C)S12C(=C(C(=O)OC)N1C1=CC=CC=C1C1=CC=CC=C21)C(=O)OC @@ -10,9 +10,9 @@ COC(C1N2S(C3C(C4C2=CC=CC=4)=CC=CC=3)(C(C)C)C=1C(OC)=O)=O CC(C)S12C(=C(C(=O)OC)N1C1=CC=CC=C1C1=CC=CC=C21)C(=O)OC *** 2 *** COC(C1[n]2s(c3c(c4c2cccc4)cccc3)(C(C)C)C=1C(OC)=O)=O -element: can not calculate implicit hydrogens on aromatic S, charge 0, degree 4, 0 radical electrons +element: cannot calculate implicit hydrogens on aromatic S, charge 0, degree 4, 0 radical electrons COC(C1[n]2s(c3c(c4c2cccc4)cccc3)(C(C)C)C=1C(OC)=O)=O -element: can not calculate implicit hydrogens on aromatic S, charge 0, degree 4, 0 radical electrons +element: cannot calculate implicit hydrogens on aromatic S, charge 0, degree 4, 0 radical electrons *** 3 *** COC(c1[n]2s(C3C(C4C2=CC=CC=4)=CC=CC=3)(C(C)C)c1C(OC)=O)=O CC(C)[s]12c(c(C(=O)OC)[n]1C1=CC=CC=C1C1=CC=CC=C21)C(=O)OC diff --git a/api/tests/integration/ref/basic/fold_unfold.py.out b/api/tests/integration/ref/basic/fold_unfold.py.out index 7894040387..ff5db8347c 100644 --- a/api/tests/integration/ref/basic/fold_unfold.py.out +++ b/api/tests/integration/ref/basic/fold_unfold.py.out @@ -1316,3 +1316,62 @@ testing query [2H]C testing query N#CC(C#N)=C1C([H])=C([H])C(=C(C#N)C#N)C([H])=C1[H] |t:4,10| 20 16 +testing query c1ccccc1 +12 +6 +testing query CCC +11 +3 +testing smarts c1ccccc1 +12 +6 +testing smarts CCC +11 +3 +testing smarts C-C-C +11 +3 +testing query + Bond Single or Double 1252422 22D 1 1.00000 0.00000 0 + + 2 1 0 0 0 0 0 0 0 0999 V2000 + 2.9920 -1.4500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8580 -0.9500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 5 0 0 0 +M END + +2 +2 +testing query + Bond Single or Aromatic 1252422 22D 1 1.00000 0.00000 0 + + 2 1 0 0 0 0 0 0 0 0999 V2000 + 2.9920 -1.4500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8580 -0.9500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 6 0 0 0 +M END + +2 +2 +testing query + Bond Double or Aromatic 1252422 22D 1 1.00000 0.00000 0 + + 2 1 0 0 0 0 0 0 0 0999 V2000 + 2.9920 -1.4500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8580 -0.9500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 7 0 0 0 +M END + +2 +2 +testing query + Bond Any 1252422 22D 1 1.00000 0.00000 0 + + 2 1 0 0 0 0 0 0 0 0999 V2000 + 2.9920 -1.4500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8580 -0.9500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 8 0 0 0 +M END + +2 +2 diff --git a/api/tests/integration/ref/basic/validate.py.out b/api/tests/integration/ref/basic/validate.py.out index 1a0b66f1cf..cb51a8117b 100644 --- a/api/tests/integration/ref/basic/validate.py.out +++ b/api/tests/integration/ref/basic/validate.py.out @@ -1,5 +1,5 @@ ** 0 ** -Exception: element: can not calculate implicit hydrogens on aromatic N, charge 0, degree 2, 0 radical electrons +Exception: element: cannot calculate implicit hydrogens on aromatic N, charge 0, degree 2, 0 radical electrons False Cc1cccc[n]1 @@ -29,11 +29,11 @@ False CC1NC=C(O)N=1 False ** 5 ** -Exception: element: can not calculate implicit hydrogens on aromatic N, charge 0, degree 2, 0 radical electrons +Exception: element: cannot calculate implicit hydrogens on aromatic N, charge 0, degree 2, 0 radical electrons dearomatization: non-unique dearomatization: Dearomatization is not unique. Cannot restore hydrogens. -Exception: element: can not calculate implicit hydrogens on aromatic N, charge 0, degree 2, 0 radical electrons +Exception: element: cannot calculate implicit hydrogens on aromatic N, charge 0, degree 2, 0 radical electrons Exception: dearomatization: non-unique dearomatization: Dearomatization is not unique. Cannot restore hydrogens. -Exception: element: can not calculate implicit hydrogens on aromatic N, charge 0, degree 2, 0 radical electrons +Exception: element: cannot calculate implicit hydrogens on aromatic N, charge 0, degree 2, 0 radical electrons ** 6 ** True element: bad valence on H having 1 drawn bonds, charge 1, and 0 radical electrons diff --git a/api/tests/integration/tests/basic/fold_unfold.py b/api/tests/integration/tests/basic/fold_unfold.py index 8bf027cb37..e233565e36 100644 --- a/api/tests/integration/tests/basic/fold_unfold.py +++ b/api/tests/integration/tests/basic/fold_unfold.py @@ -51,6 +51,15 @@ def testFoldUnfoldSingleQueryMol(smiles): print(mol2.countAtoms()) +def testFoldUnfoldSMARTS(smarts): + print("testing smarts " + smarts) + mol = indigo.loadSmarts(smarts) + mol.unfoldHydrogens() + print(mol.countAtoms()) + mol.foldHydrogens() + print(mol.countAtoms()) + + def testFoldUnfoldSingleReaction(smiles): print("testing " + smiles) rxn = indigo.loadReaction(smiles) @@ -103,3 +112,50 @@ def testFoldUnfoldQueryReaction(smiles): testFoldUnfoldSingleQueryMol( "N#CC(C#N)=C1C([H])=C([H])C(=C(C#N)C#N)C([H])=C1[H] |t:4,10|" ) + +testFoldUnfoldSingleQueryMol("c1ccccc1") +testFoldUnfoldSingleQueryMol("CCC") +testFoldUnfoldSMARTS("c1ccccc1") +testFoldUnfoldSMARTS("CCC") +testFoldUnfoldSMARTS("C-C-C") + +mol = """ + Bond Single or Double 1252422 22D 1 1.00000 0.00000 0 + + 2 1 0 0 0 0 0 0 0 0999 V2000 + 2.9920 -1.4500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8580 -0.9500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 5 0 0 0 +M END +""" +testFoldUnfoldSingleQueryMol(mol) +mol = """ + Bond Single or Aromatic 1252422 22D 1 1.00000 0.00000 0 + + 2 1 0 0 0 0 0 0 0 0999 V2000 + 2.9920 -1.4500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8580 -0.9500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 6 0 0 0 +M END +""" +testFoldUnfoldSingleQueryMol(mol) +mol = """ + Bond Double or Aromatic 1252422 22D 1 1.00000 0.00000 0 + + 2 1 0 0 0 0 0 0 0 0999 V2000 + 2.9920 -1.4500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8580 -0.9500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 7 0 0 0 +M END +""" +testFoldUnfoldSingleQueryMol(mol) +mol = """ + Bond Any 1252422 22D 1 1.00000 0.00000 0 + + 2 1 0 0 0 0 0 0 0 0999 V2000 + 2.9920 -1.4500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8580 -0.9500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 8 0 0 0 +M END +""" +testFoldUnfoldSingleQueryMol(mol) diff --git a/bingo/tests/data/reactions/checkreaction/std.json b/bingo/tests/data/reactions/checkreaction/std.json index fbffb74e7b..4a922dd7d0 100644 --- a/bingo/tests/data/reactions/checkreaction/std.json +++ b/bingo/tests/data/reactions/checkreaction/std.json @@ -1027,7 +1027,7 @@ { "query_id": 206, "query_type": "checkreaction", - "expected": "element: can not calculate implicit hydrogens on aromatic C, charge 0, degree 1, 0 radical electrons" + "expected": "element: cannot calculate implicit hydrogens on aromatic C, charge 0, degree 1, 0 radical electrons" }, { "query_id": 207, diff --git a/core/indigo-core/molecule/src/molecule.cpp b/core/indigo-core/molecule/src/molecule.cpp index d7946398f6..859ea8cded 100644 --- a/core/indigo-core/molecule/src/molecule.cpp +++ b/core/indigo-core/molecule/src/molecule.cpp @@ -821,7 +821,7 @@ int Molecule::_getImplicitHForConnectivity(int idx, int conn, bool use_cache) if (_ignore_bad_valence) impl_h = 0; else - throw Element::Error("can not calculate implicit hydrogens on aromatic %s, charge %d, degree %d, %d radical electrons", + throw Element::Error("cannot calculate implicit hydrogens on aromatic %s, charge %d, degree %d, %d radical electrons", Element::toString(atom.number), atom.charge, getVertex(idx).degree(), Element::radicalElectrons(radical)); } } diff --git a/core/indigo-core/molecule/src/query_molecule.cpp b/core/indigo-core/molecule/src/query_molecule.cpp index de8c9dbe90..ad50ea8bf2 100644 --- a/core/indigo-core/molecule/src/query_molecule.cpp +++ b/core/indigo-core/molecule/src/query_molecule.cpp @@ -2434,16 +2434,25 @@ int QueryMolecule::getAtomTotalH(int idx) int QueryMolecule::_calcAtomConnectivity(int idx) { const Vertex& vertex = getVertex(idx); - int i, conn = 0; + int i = 0, conn = 0; bool was_aromatic = false; + int atom_aromaticy = -1; + // Smarts treat default bond as SINGLE_OR_AROMATIC so for this bonds look to atom aromaticy + if (original_format == SMARTS) + { + std::ignore = getAtom(idx).sureValue(ATOM_AROMATICITY, atom_aromaticy); + } for (i = vertex.neiBegin(); i != vertex.neiEnd(); i = vertex.neiNext(i)) { int order = getBondOrder(vertex.neiEdge(i)); + if (order < 0) + order = getQueryBondType(getBond(vertex.neiEdge(i))); - if (order == BOND_SINGLE || order == BOND_DOUBLE || order == BOND_TRIPLE) + if (order == BOND_SINGLE || order == BOND_DOUBLE || order == BOND_TRIPLE || + (original_format == SMARTS && order == _BOND_SINGLE_OR_AROMATIC && atom_aromaticy == ATOM_ALIPHATIC)) conn += order; - else if (order == BOND_AROMATIC || order == _BOND_SINGLE_OR_AROMATIC || order == _BOND_DOUBLE_OR_AROMATIC) + else if (order == BOND_AROMATIC || (original_format == SMARTS && order == _BOND_SINGLE_OR_AROMATIC && atom_aromaticy == ATOM_AROMATIC)) { conn += 1; if (was_aromatic) @@ -2457,9 +2466,9 @@ int QueryMolecule::_calcAtomConnectivity(int idx) } } else - conn += 1; + return -1; } - if (was_aromatic) + if (was_aromatic) // +1 connection for odd aromatic bond conn += 1; return conn; } @@ -3215,6 +3224,8 @@ int QueryMolecule::getImplicitH(int idx, bool /*impl_h_no_throw*/) // If implicit h is not set - calculate it int max_h = 0; int conn = _calcAtomConnectivity(idx); + if (conn < 0) // can't calculate - no implicit H + return 0; if (properties.count(ATOM_TOTAL_H) > 0) max_h = properties[ATOM_TOTAL_H]->value_min - getAtomConnectedH(idx); else