Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#1360 Wrong ordering of atoms in atom list #1368

Merged
merged 6 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions api/tests/integration/ref/formats/smarts.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,9 @@ smarts loaded OK
[!#6!#7!#8] is ok. expected string found in json
#1355 Error appeared at save query molecule with RSite as smarts
Ok expected smarts generated
[!#40!#79!#30]-[#6]-[#6] is ok. smarts_in==smarts_out
[!#40!#79!#30]-[#6]-[#6] is ok. json_in==json_out
[!#40!#79!#30]-[#6]-[#6] is ok. expected string found in json
[#40,#79,#30]-[#6]-[#6] is ok. smarts_in==smarts_out
[#40,#79,#30]-[#6]-[#6] is ok. json_in==json_out
[#40,#79,#30]-[#6]-[#6] is ok. expected string found in json
8 changes: 8 additions & 0 deletions api/tests/integration/tests/formats/smarts.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,11 @@ def test_smarts_load_save_through_ket(smarts_in, expected_str):
print(
"Fail. Expected smarts is\n%s\nbut generated\n%s" % (expected, smarts)
)
smarts = "[!#40!#79!#30]-[#6]-[#6]"
expected = (
'"atoms":[{"type":"atom-list","notList":true,"elements":["Zr","Au","Zn"],'
)
test_smarts_load_save_through_ket(smarts, expected)
smarts = "[#40,#79,#30]-[#6]-[#6]"
expected = '"atoms":[{"type":"atom-list","elements":["Zr","Au","Zn"],'
test_smarts_load_save_through_ket(smarts, expected)
4 changes: 2 additions & 2 deletions core/indigo-core/molecule/query_molecule.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,8 @@ namespace indigo
void _removeBonds(const Array<int>& indices) override;

using AtomList = std::pair<bool, std::set<int>>;
static bool _isAtomListOr(const Atom* pqa, std::set<int>& list);
static bool _isAtomOrListAndProps(const Atom* pqa, std::set<int>& list, bool& neg, std::map<int, const Atom*>& properties);
static bool _isAtomListOr(const Atom* pqa, std::vector<int>& list);
static bool _isAtomOrListAndProps(const Atom* pqa, std::vector<int>& list, bool& neg, std::map<int, const Atom*>& properties);
static bool _isAtomList(const Atom* qa, AtomList list);

Array<int> _min_h;
Expand Down
51 changes: 27 additions & 24 deletions core/indigo-core/molecule/src/query_molecule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2333,20 +2333,20 @@ QueryMolecule::Atom* QueryMolecule::stripKnownAttrs(QueryMolecule::Atom& qa)
return qd == NULL ? &qa : qd;
}

bool QueryMolecule::_isAtomListOr(const Atom* p_query_atom, std::set<int>& list)
bool QueryMolecule::_isAtomListOr(const Atom* p_query_atom, std::vector<int>& list)
{
// Check if p_query_atom atom list like or(a1,a2,a3, or(a4,a5,a6), a7)
if (!p_query_atom)
return false;
if (p_query_atom->type != OP_OR)
return false;
std::set<int> collected;
std::vector<int> collected;
for (auto i = 0; i < p_query_atom->children.size(); i++)
{
Atom* p_query_atom_child = const_cast<Atom*>(p_query_atom)->child(i);
if (p_query_atom_child->type == ATOM_NUMBER && p_query_atom_child->value_max == p_query_atom_child->value_max)
{
collected.insert(p_query_atom_child->value_min);
collected.emplace_back(p_query_atom_child->value_min);
}
else if (p_query_atom_child->type == OP_OR)
{
Expand All @@ -2358,18 +2358,18 @@ bool QueryMolecule::_isAtomListOr(const Atom* p_query_atom, std::set<int>& list)
}
if (collected.size() < 1)
return false;
list.insert(collected.begin(), collected.end());
list = collected;
return true;
}

bool QueryMolecule::_isAtomOrListAndProps(const Atom* p_query_atom, std::set<int>& list, bool& neg, std::map<int, const Atom*>& properties)
bool QueryMolecule::_isAtomOrListAndProps(const Atom* p_query_atom, std::vector<int>& list, bool& neg, std::map<int, const Atom*>& properties)
{
// Check if p_query_atom contains only atom or atom list and atom properties connected by "and"
// atom list is positive i.e. or(a1,a2,a3,or(a4,a5),a6) or negative
// negative list like is set of op_not(atom_number) or op_not(positevi list), this set connected by "and"
if (!p_query_atom)
return false;
std::set<int> collected;
std::vector<int> collected;
std::map<int, const Atom*> collected_properties;
if (p_query_atom->type != OP_AND)
{
Expand All @@ -2382,7 +2382,7 @@ bool QueryMolecule::_isAtomOrListAndProps(const Atom* p_query_atom, std::set<int
}
if (p_query_atom_child->type == ATOM_NUMBER && p_query_atom_child->value_min == p_query_atom_child->value_max)
{
list.insert(p_query_atom_child->value_min);
list.emplace_back(p_query_atom_child->value_min);
neg = is_neg;
return true;
}
Expand All @@ -2394,7 +2394,8 @@ bool QueryMolecule::_isAtomOrListAndProps(const Atom* p_query_atom, std::set<int
else if (_isAtomListOr(p_query_atom_child, collected))
{
neg = is_neg;
list.insert(collected.begin(), collected.end());
for (auto item : collected)
list.emplace_back(item);
return true;
}
return false;
Expand All @@ -2409,8 +2410,11 @@ bool QueryMolecule::_isAtomOrListAndProps(const Atom* p_query_atom, std::set<int
if (list.size() > 0 && is_neg != neg) // allowed only one list type in set - positive or negative
return false;
neg = is_neg;
list.insert(collected.begin(), collected.end());
for (auto item : collected)
list.emplace_back(item);
collected.clear();
properties.insert(collected_properties.begin(), collected_properties.end());
collected_properties.clear();
}
else
return false;
Expand All @@ -2420,39 +2424,38 @@ bool QueryMolecule::_isAtomOrListAndProps(const Atom* p_query_atom, std::set<int

int QueryMolecule::parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector<int>& list, std::map<int, const Atom*>& properties)
{
std::set<int> atom_list;
std::vector<int> atom_list;
std::map<int, const Atom*> atom_pros;
bool negative = false;
QueryMolecule::Atom& qa = qm.getAtom(aid);
if (qa.type == QueryMolecule::OP_NONE)
return QUERY_ATOM_AH;
if (_isAtomOrListAndProps(&qa, atom_list, negative, atom_pros))
{
for (auto atom : atom_list)
list.emplace_back(atom);
std::sort(list.begin(), list.end());
list = atom_list;
std::sort(atom_list.begin(), atom_list.end());
properties.insert(atom_pros.begin(), atom_pros.end());

if (negative)
{
if (atom_list.size() == 1 && atom_list.count(ELEM_H) > 0)
if (atom_list.size() == 1 && atom_list[0] == ELEM_H)
return QUERY_ATOM_A; // !H
else if (list == std::vector<int>{ELEM_H, ELEM_C})
else if (atom_list == std::vector<int>{ELEM_H, ELEM_C})
return QUERY_ATOM_Q;
else if (list == std::vector<int>{ELEM_C})
else if (atom_list == std::vector<int>{ELEM_C})
return QUERY_ATOM_QH;
else if (list == std::vector<int>{ELEM_H, ELEM_He, ELEM_C, ELEM_N, ELEM_O, ELEM_F, ELEM_Ne, ELEM_P, ELEM_S, ELEM_Cl, ELEM_Ar, ELEM_Se, ELEM_Br,
ELEM_Kr, ELEM_I, ELEM_Xe, ELEM_At, ELEM_Rn})
else if (atom_list == std::vector<int>{ELEM_H, ELEM_He, ELEM_C, ELEM_N, ELEM_O, ELEM_F, ELEM_Ne, ELEM_P, ELEM_S, ELEM_Cl, ELEM_Ar, ELEM_Se, ELEM_Br,
ELEM_Kr, ELEM_I, ELEM_Xe, ELEM_At, ELEM_Rn})
return QUERY_ATOM_M;
else if (list == std::vector<int>{ELEM_He, ELEM_C, ELEM_N, ELEM_O, ELEM_F, ELEM_Ne, ELEM_P, ELEM_S, ELEM_Cl, ELEM_Ar, ELEM_Se, ELEM_Br, ELEM_Kr,
ELEM_I, ELEM_Xe, ELEM_At, ELEM_Rn})
else if (atom_list == std::vector<int>{ELEM_He, ELEM_C, ELEM_N, ELEM_O, ELEM_F, ELEM_Ne, ELEM_P, ELEM_S, ELEM_Cl, ELEM_Ar, ELEM_Se, ELEM_Br,
ELEM_Kr, ELEM_I, ELEM_Xe, ELEM_At, ELEM_Rn})
return QUERY_ATOM_MH;
}
else
{
if (list == std::vector<int>{ELEM_F, ELEM_Cl, ELEM_Br, ELEM_I, ELEM_At})
if (atom_list == std::vector<int>{ELEM_F, ELEM_Cl, ELEM_Br, ELEM_I, ELEM_At})
return QUERY_ATOM_X;
else if (list == std::vector<int>{ELEM_H, ELEM_F, ELEM_Cl, ELEM_Br, ELEM_I, ELEM_At})
else if (atom_list == std::vector<int>{ELEM_H, ELEM_F, ELEM_Cl, ELEM_Br, ELEM_I, ELEM_At})
return QUERY_ATOM_XH;
}
if (negative)
Expand All @@ -2461,9 +2464,9 @@ int QueryMolecule::parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector<
}
else
{
if (list.size() == 0)
if (atom_list.size() == 0)
return QUERY_ATOM_A;
else if (list.size() == 1)
else if (atom_list.size() == 1)
return QUERY_ATOM_SINGLE;
else
return QUERY_ATOM_LIST;
Expand Down
Loading