From c8404da39e795471d7f11dfdf9fcf5fbb51ecc3d Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 27 Oct 2023 10:53:58 -0400 Subject: [PATCH 01/33] Enable fixing more problematic molecules from XYZ Added a few more examples from analyzing the QuantumPioneer data --- rdmc/fix.py | 85 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 77 insertions(+), 8 deletions(-) diff --git a/rdmc/fix.py b/rdmc/fix.py index 08adbef2..dd537754 100644 --- a/rdmc/fix.py +++ b/rdmc/fix.py @@ -12,7 +12,7 @@ from rdkit.Chem import rdChemReactions, rdmolops -DEFAULT_REMEDIES = [ +RECOMMEND_REMEDIES = [ # Remedy 1 - Carbon monoxide: [C]=O to [C-]#[O+] rdChemReactions.ReactionFromSmarts( "[O+0-0v2X1:1]=[C+0-0v2X1:2]>>[O+1v3X1:1]#[C-1v3X1:2]" @@ -25,33 +25,96 @@ rdChemReactions.ReactionFromSmarts( "[N+0-0v4X2:1]#[C+0-0v3X1:2]>>[N+v4X2:1]#[C-v3X1:2]" ), - # Remedy 4 - amine radical: RC(R)-N(R)(R)R to R[C-](R)-[N+](R)(R)R + # Remedy 4 - azide: RN=N=[N] to RN=[N+]=[N-] + rdChemReactions.ReactionFromSmarts( + "[N+0-0v3X2:1]=[N+0-0v4X2:2]=[N+0-0v2X1:3]>>[N+0-0v3X2:1]=[N+1v4X2:2]=[N-1v2X1:3]" + ), + # Remedy 5 - amine oxide: RN(R)(R)-O to R[N+](R)(R)-[O-] + rdChemReactions.ReactionFromSmarts( + "[N+0-0v4X4:1]-[O+0-0v1X1:2]>>[N+1v4X4:1]-[O-1v1X1:2]" + ), + # Remedy 6 - amine radical: RC(R)-N(R)(R)R to R[C-](R)-[N+](R)(R)R rdChemReactions.ReactionFromSmarts( "[N+0-0v4X4:1]-[C+0-0v3X3:2]>>[N+1v4X4:1]-[C-1v3X3:2]" ), - # Remedy 5 - amine radical: RN(R)=C to RN(R)-[C] + # Remedy 7 - amine radical: RN(R)=C to RN(R)-[C] rdChemReactions.ReactionFromSmarts( "[N+0-0v4X3:1]=[C+0-0v4X3:2]>>[N+0-0v3X3:1]-[C+0-0v3X3:2]" ), - # Remedy 5 - quintuple C bond, usually due to RC(=O)=O: R=C(R)=O to R=[C+](R)-[O-] + # Remedy 8 - quintuple C bond, usually due to RC(=O)=O: R=C(R)=O to R=[C+](R)-[O-] rdChemReactions.ReactionFromSmarts( "[C+0-0v5X3:1]=[O+0-0v2X1:2]>>[C+0-0v4X3:1]-[O+0-0v1X1:2]" ), - # Remedy 6 - amine oxide: RN(R)(R)-O to R[N+](R)(R)-[O-] + # Remedy 9 - sulphuric bi-radicals: R[S](R)(-[O])-[O] to R[S](R)(=O)(=O) rdChemReactions.ReactionFromSmarts( - "[N+0-0v4X4:1]-[O+0-0v1X1:2]>>[N+1v4X4:1]-[O-1v1X1:2]" + "[S+0-0v4X4:1](-[O+0-0v1X1:2])-[O+0-0v1X1:3]>>[S+0-0v6X4:1](=[O+0-0v2X1:2])=[O+0-0v2X1:3]" + ), + # Remedy 10 - Triazinane: C1=N=C=N=C=N=1 to c1ncncn1 + rdChemReactions.ReactionFromSmarts( + "[C+0-0v5X3:1]1=[N+0-0v4X2:2]=[C+0-0v5X3:3]=[N+0-0v4X2:4]=[C+0-0v5X3:5]=[N+0-0v4X2:6]=1" + ">>[C+0-0v5X3:1]1[N+0-0v4X2:2]=[C+0-0v5X3:3][N+0-0v4X2:4]=[C+0-0v5X3:5][N+0-0v4X2:6]=1" ), - # Remedy 7 - criegee like molecule: RN(R)(R)-C(R)(R)=O to R[N+](R)(R)-[C](R)(R)-[O-] +] + + +ZWITTERION_REMEDIES = [ + # Remedy 1 - criegee like molecule: RN(R)(R)-C(R)(R)=O to R[N+](R)(R)-[C](R)(R)-[O-] rdChemReactions.ReactionFromSmarts( "[N+0-0v4X4:1]-[C+0-0v4X3:2]=[O+0-0v2X1:3]>>[N+1v4X4:1]-[C+0-0v3X3:2]-[O-1v1X1:3]" ), - # Remedy 8 - criegee like molecule: RN(R)(R)-C(R)(R)=O to R[N+](R)(R)-[C](R)(R)-[O-] + # Remedy 2 - criegee like molecule: RN(R)(R)-[C](R)(R)[O] to R[N+](R)(R)-[C](R)(R)-[O-] rdChemReactions.ReactionFromSmarts( "[N+0-0v4X4:1]-[C+0-0v3X3:2]-[O+0-0v1X1:3]>>[N+1v4X4:1]-[C+0-0v3X3:2]-[O-1v1X1:3]" ), + # Remedy 3 - ammonium + carboxylic: ([N]R4.C(=O)[O]) to ([N+]R4.C(=O)[O-]) + rdChemReactions.ReactionFromSmarts( + "([N+0-0v4X4:1].[O+0-0v2X1:2]=[C+0-0v4X3:3]-[O+0-0v1X1:4])>>([N+1v4X4:1].[O+0-0v2X1:2]=[C+0-0v4X3:3]-[O-1v1X1:4])" + ), + # Remedy 4 - ammonium + phosphoric: ([N]R4.P(=O)[O]) to ([N+]R4.P(=O)[O-]) + rdChemReactions.ReactionFromSmarts( + "([N+0-0v4X4:1].[P+0-0v5X4:2]-[O+0-0v1X1:3])>>([N+1v4X4:1].[P+0-0v5X4:2]-[O-1v1X1:3])" + ), + # Remedy 5 - ammonium + sulphuric: ([N]R4.S(=O)(=O)[O]) to ([N+]R4.S(=O)(=O)[O-]) + rdChemReactions.ReactionFromSmarts( + "([N+0-0v4X4:1].[S+0-0v6X4:2]-[O+0-0v1X1:3])>>([N+1v4X4:1].[S+0-0v6X4:2]-[O-1v1X1:3])" + ), + # Remedy 6 - ammonium + carbonyl in ring: ([N]R4.C=O) to ([N+]R4.[C.]-[O-]) + rdChemReactions.ReactionFromSmarts( + "([N+0-0v4X4:1].[C+0-0v4X3R:2]=[O+0-0v2X1:3])>>([N+1v4X4:1].[C+0-0v3X3R:2]-[O-1v1X1:3])" + ), ] +RING_REMEDIES = [ + # The first four elements' sequence matters + # TODO: Find a better solution to avoid the impact of sequence + # Remedy 1 - quintuple C in ring: R1=C(R)=N-R1 to R1=C(R)[N]-R1 + rdChemReactions.ReactionFromSmarts( + "[C+0-0v5X3R:1]=[N+0-0v3X2R:2]>>[C+0-0v4X3R:1]-[N+0-0v2X2R:2]" + ), + # Remedy 2 - quadruple N in ring: R1=N=C(R)R1 to R1=N-[C](R)R1 + rdChemReactions.ReactionFromSmarts( + "[N+0-0v4X2R:1]=[C+0-0v4X3R:2]>>[N+0-0v3X2R:1]-[C+0-0v3X3R:2]" + ), + # Remedy 3 - ring =C(R)=N-[C]: R1=C(R)=N-[C](R)R1 to R1=C(R)-N=C(R)R1 + rdChemReactions.ReactionFromSmarts( + "[C+0-0v5X3R:1]=[N+0-0v3X2R:2]-[C+0-0v3X3:3]>>[C+0-0v4X3R:1]-[N+0-0v3X2R:2]=[C+0-0v4X3:3]" + ), + # Remedy 4 - ring -N-N-: R1-N-N-R1 to R1-N=N-R1 + rdChemReactions.ReactionFromSmarts( + "[N+0-0v2X2R:1]-[N+0-0v2X2R:2]>>[N+0-0v3X2R:1]=[N+0-0v3X2R:2]" + ), + # Remedy 5 - bicyclic radical + rdChemReactions.ReactionFromSmarts( + "[C+0-0v4:1]1[C+0-0v4X4:2]23[C+0-0v4:3][N+0-0v4X4:4]12[C+0-0v4:5]3>>[C+0-0v4:1]1[C+0-0v3X3:2]2[C+0-0v4:3][N+0-0v3X3:4]1[C+0-0v4:5]2" + ), +] + + +DEFAULT_REMEDIES = RECOMMEND_REMEDIES +ALL_REMEDIES = RECOMMEND_REMEDIES + ZWITTERION_REMEDIES + RING_REMEDIES + + def update_product_atom_map_after_reaction( mol: "RDKitMol", ref_mol: "RDKitMol", @@ -104,6 +167,7 @@ def fix_mol_by_remedy( for _ in range(max_attempts): tmp_mol.UpdatePropertyCache(False) + rdmolops.GetSymmSSSR(tmp_mol) try: # Remedy are designed to be unimolecular (group transformation), so the product will be unimolecular as well # If no match, RunReactants will return an empty tuple and thus cause an IndexError. @@ -203,6 +267,7 @@ def fix_mol( sanitize: bool = True, fix_spin_multiplicity: bool = False, mult: int = 1, + renumber_atoms: bool = True, ) -> "RDKitMol": """ Fix the molecule by applying the given remedies and saturating the radical sites to full fill the desired spin multiplicity. @@ -219,6 +284,7 @@ def fix_mol( Defaults to ``False``. mult (int, optional): The desired spin multiplicity. Defaults to ``1``. Only used when ``fix_spin_multiplicity`` is ``True``. + renumber_atoms (bool, optional): Whether to renumber the atoms after the fix. Defaults to ``True``. Returns: RDKitMol: The fixed molecule. @@ -233,4 +299,7 @@ def fix_mol( if fix_spin_multiplicity: mol = fix_mol_spin_multiplicity(mol, mult) + if renumber_atoms: + mol = mol.RenumberAtoms() + return mol From caf4e46e2877c891536afb2c1d5e0626c1386bb6 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 27 Oct 2023 13:52:52 -0400 Subject: [PATCH 02/33] Update the instruction for mol fix recipe --- rdmc/fix.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rdmc/fix.py b/rdmc/fix.py index dd537754..2abd4de0 100644 --- a/rdmc/fix.py +++ b/rdmc/fix.py @@ -33,7 +33,7 @@ rdChemReactions.ReactionFromSmarts( "[N+0-0v4X4:1]-[O+0-0v1X1:2]>>[N+1v4X4:1]-[O-1v1X1:2]" ), - # Remedy 6 - amine radical: RC(R)-N(R)(R)R to R[C-](R)-[N+](R)(R)R + # Remedy 6 - amine radical: R[C](R)-N(R)(R)R to R[C-](R)-[N+](R)(R)R rdChemReactions.ReactionFromSmarts( "[N+0-0v4X4:1]-[C+0-0v3X3:2]>>[N+1v4X4:1]-[C-1v3X3:2]" ), @@ -41,7 +41,7 @@ rdChemReactions.ReactionFromSmarts( "[N+0-0v4X3:1]=[C+0-0v4X3:2]>>[N+0-0v3X3:1]-[C+0-0v3X3:2]" ), - # Remedy 8 - quintuple C bond, usually due to RC(=O)=O: R=C(R)=O to R=[C+](R)-[O-] + # Remedy 8 - quintuple C bond, usually due to RC(=O)=O: R=C(R)=O to R=C(R)-[O] rdChemReactions.ReactionFromSmarts( "[C+0-0v5X3:1]=[O+0-0v2X1:2]>>[C+0-0v4X3:1]-[O+0-0v1X1:2]" ), @@ -62,9 +62,9 @@ rdChemReactions.ReactionFromSmarts( "[N+0-0v4X4:1]-[C+0-0v4X3:2]=[O+0-0v2X1:3]>>[N+1v4X4:1]-[C+0-0v3X3:2]-[O-1v1X1:3]" ), - # Remedy 2 - criegee like molecule: RN(R)(R)-[C](R)(R)[O] to R[N+](R)(R)-[C](R)(R)-[O-] + # Remedy 2 - criegee like molecule: R[N+](R)(R)-[C-](R)(R)[O] to R[N+](R)(R)-[C](R)(R)-[O-] rdChemReactions.ReactionFromSmarts( - "[N+0-0v4X4:1]-[C+0-0v3X3:2]-[O+0-0v1X1:3]>>[N+1v4X4:1]-[C+0-0v3X3:2]-[O-1v1X1:3]" + "[N+1v4X4:1]-[C-1v3X3:2]-[O+0-0v1X1:3]>>[N+1v4X4:1]-[C+0-0v3X3:2]-[O-1v1X1:3]" ), # Remedy 3 - ammonium + carboxylic: ([N]R4.C(=O)[O]) to ([N+]R4.C(=O)[O-]) rdChemReactions.ReactionFromSmarts( @@ -270,7 +270,7 @@ def fix_mol( renumber_atoms: bool = True, ) -> "RDKitMol": """ - Fix the molecule by applying the given remedies and saturating the radical sites to full fill the desired spin multiplicity. + Fix the molecule by applying the given remedies and saturating bi-radical or carbene to fix spin multiplicity. Args: mol (RDKitMol): The molecule to be fixed. From ab4b4664b95a978a6724f5b2fff3c3043aed6ec1 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 27 Oct 2023 14:03:48 -0400 Subject: [PATCH 03/33] Enable to generate mol without sanitization from SMILES update property cache is necessary if sanitize=False --- rdmc/mol.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rdmc/mol.py b/rdmc/mol.py index 032cc9d8..5340a617 100644 --- a/rdmc/mol.py +++ b/rdmc/mol.py @@ -501,6 +501,7 @@ def FromSmiles(cls, # (e.g., [C+:1]#[C:2][C:3]1=[C:7]([H:10])[N-:6][O:5][C:4]1([H:8])[H:9]), # no hydrogens are automatically added. So, we need to add H atoms. if not removeHs and addHs: + mol.UpdatePropertyCache(strict=False) mol = Chem.rdmolops.AddHs(mol) # Create RDKitMol From c7c1a42dfab7b4313c43ce9734dda390d86e3195 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 27 Oct 2023 14:04:05 -0400 Subject: [PATCH 04/33] Add more test case for molecule fixing --- test/test_fix.py | 159 +++++++++++++++-------------------------------- 1 file changed, 51 insertions(+), 108 deletions(-) diff --git a/test/test_fix.py b/test/test_fix.py index ef9c8ff3..85f930c4 100644 --- a/test/test_fix.py +++ b/test/test_fix.py @@ -7,119 +7,62 @@ import pytest from rdmc import RDKitMol -from rdmc.fix import fix_mol - - -@pytest.mark.parametrize("xyz,smiles", [ - ( - """11 - -N -0.6257110000 -0.2593320000 -0.9884930000 -C 0.2688160000 -0.8319990000 0.0754190000 -O 0.3964400000 -2.0228180000 0.1135550000 -C 0.8450220000 0.1995700000 0.9394180000 -C 1.6321610000 -0.1480200000 1.9559930000 -H -0.1237010000 0.4381030000 -1.5829470000 -H -0.9963200000 -1.0439920000 -1.5361970000 -H 0.6035930000 1.2250590000 0.6918340000 -H 1.8315670000 -1.1898640000 2.1796230000 -H 2.0878070000 0.6011050000 2.5908840000 -O -1.6323370000 0.4883040000 -0.4054090000""", - "C=CC(=O)[NH2+][O-]" - ), - ( - """37 - -C 0.0878700000 1.9457110000 3.8222230000 -C 0.3845130000 0.6296420000 3.1095460000 -C 0.9579700000 0.8236020000 1.6869360000 -C 2.2986750000 1.5504340000 1.6697210000 -C 1.0910500000 -0.5190180000 1.0219660000 -O 2.0949340000 -1.0733740000 0.6996620000 -N -0.2130960000 -1.2487480000 0.7300530000 -H -0.3850710000 1.7585110000 4.7872470000 -H -0.5904760000 2.5644870000 3.2307820000 -H 0.9960200000 2.5201880000 4.0052000000 -H -0.5433040000 0.0539610000 3.0561050000 -H 1.0918580000 0.0375120000 3.6988600000 -H 0.2328600000 1.3887070000 1.0988110000 -H 2.6847060000 1.6127630000 0.6536490000 -H 3.0373200000 1.0285160000 2.2805420000 -H 2.1842900000 2.5635430000 2.0520890000 -H -0.5907660000 -1.6620990000 1.5910120000 -H 0.0517760000 -2.0040840000 0.0539540000 -C 1.8317090000 0.6812160000 -2.3476270000 -C 0.9480470000 1.7572160000 -1.8340960000 -C -1.1999620000 -0.4862230000 -0.0458380000 -C -2.2254020000 0.3182860000 0.6649800000 -C -1.2230550000 -0.9919480000 -1.3439180000 -O -0.4618060000 -1.8982750000 -1.7471550000 -O -2.1371840000 -0.4099140000 -2.1755100000 -C -2.1593300000 -0.9149810000 -3.5106800000 -H 2.0987910000 0.8470850000 -3.4039840000 -H 1.3508680000 -0.2970970000 -2.2970340000 -H 2.7770430000 0.6305950000 -1.7998070000 -H -0.1255720000 1.6853060000 -1.9370840000 -H 1.3607280000 2.7011440000 -1.5006770000 -H -2.6977030000 -0.2332810000 1.4943930000 -H -3.0142990000 0.5713670000 -0.0419550000 -H -1.8625980000 1.2646920000 1.0872720000 -H -2.9200460000 -0.3349620000 -4.0297310000 -H -2.4170940000 -1.9752960000 -3.5264770000 -H -1.1899930000 -0.7837590000 -3.9943770000 -""", - "CCC(C)C(=O)[NH2+][C-](C)C(=O)OC.[CH2]C" - ), - ( - """26 - -C -0.36408100 0.98431600 -3.37195000 -C -0.03350700 -0.39420800 -2.87710200 -C -1.05010500 -1.47569700 -3.07352400 -C 2.29463400 -0.90065000 -3.70766700 -N 1.24568900 -0.76369000 -3.18038600 -H -1.22236000 1.41742000 -2.85803300 -H -0.62425500 0.89488200 -4.43452700 -H 0.48105600 1.66021200 -3.26957600 -H -2.00329000 -1.19413600 -2.62552200 -H -0.71793600 -2.41900800 -2.64331400 -H -1.20797300 -1.63004700 -4.14867900 -C -1.70593000 2.03178400 1.45065100 -C -1.82890000 1.69736300 -0.02415500 -O -1.44824900 0.32941900 -0.26785800 -C -0.11554300 0.07180800 -0.29980900 -O 0.72367100 0.93118700 -0.34867400 -C 0.14105500 -1.36513700 -0.04726300 -C 1.38042000 -1.83093000 0.02565700 -H -2.05838100 3.04983100 1.62919500 -H -0.66751400 1.96935100 1.77496700 -H -2.30742700 1.34867400 2.05221000 -H -1.20984900 2.36194100 -0.62487000 -H -2.86297900 1.76965300 -0.35874400 -H -0.72723600 -1.98853700 0.12036700 -H 2.21712800 -1.18644400 -0.20841200 -H 1.58563500 -2.86115500 0.28392800""", - "C=CC(=O)OCC.[C-]#[N+][C](C)C", - ) -]) -def test_fix_mol_from_xyz(xyz, smiles): - - mol = RDKitMol.FromXYZ(xyz, backend="openbabel", sanitize=False) - +from rdmc.fix import ALL_REMEDIES, fix_mol + + +@pytest.mark.parametrize( + "smi, exp_smi", + [ + ("O=O", "[O][O]"), + ("[C]=O", "[C-]#[O+]"), + ("CS(C)([O])[O]", "CS(C)(=O)=O"), + ], +) +def test_fix_sanitize_ok(smi, exp_smi): + mol = RDKitMol.FromSmiles(smi) + assert fix_mol(mol, ALL_REMEDIES).ToSmiles() == exp_smi + + +@pytest.mark.parametrize( + "smi, exp_smi", + [ + ("[NH3][O]", "[NH3+][O-]"), + ("[CH2][NH3]", "[CH2-][NH3+]"), + ("[C]#[NH]", "[C-]#[NH+]"), + ("[N]=N=N", "[N-]=[N+]=N"), + ("[CH2]=[NH2]", "[CH2]N"), + ("O=[CH]=O", "[O]C=O"), + ("[CH]1=N=[CH]=N=[CH]=N=1", "c1ncncn1"), + ("[NH3][CH]=O", "[NH3+][CH][O-]"), + ("[NH3][CH][O]", "[NH3+][CH][O-]"), + ("[NH3]CC(=O)[O]", "[NH3+]CC(=O)[O-]"), + ("[NH3]CS(=O)(=O)[O]", "[NH3+]CS(=O)(=O)[O-]"), + ("[NH3]CP(=O)([O])O", "[NH3+]CP(=O)([O-])O"), + ], +) +def test_fix_sanitize_bad_non_resonance(smi, exp_smi): + mol = RDKitMol.FromSmiles(smi, sanitize=False) with pytest.raises(Exception): mol.Sanitize() + assert fix_mol(mol, ALL_REMEDIES).ToSmiles() == exp_smi - assert set(fix_mol(mol).ToSmiles().split(".")) \ - == set(smiles.split(".")) +def test_fix_mol_complex(): + mol = RDKitMol.FromSmiles("O=O.[C]=O") + assert set(fix_mol(mol, ALL_REMEDIES).ToSmiles().split(".")) == set( + ["[O][O]", "[C-]#[O+]"] + ) -def test_fix_o2(): - - mol = RDKitMol.FromSmiles("O=O") - assert fix_mol(mol).ToSmiles() == "[O][O]" +def test_fix_spin_multiplicity(): + mol = RDKitMol.FromSmiles("[CH2][CH2]") + assert fix_mol(mol, fix_spin_multiplicity=True, mult=1).GetSpinMultiplicity() == 1 -def test_fix_co(): - mol = RDKitMol.FromSmiles("[C]=O") - assert fix_mol(mol).ToSmiles() == "[C-]#[O+]" +def test_renumber_after_fix(): + mol = RDKitMol.FromSmiles("[H:1][C:2]([H:3])[N:4]#[C:5]", sanitize=False) + mol_fix = fix_mol(mol.Copy(quickCopy=True), renumber_atoms=False) + assert mol.GetAtomMapNumbers() != mol_fix.GetAtomMapNumbers() + mol_fix = fix_mol(mol.Copy(quickCopy=True), renumber_atoms=True) + assert mol.GetAtomMapNumbers() == mol_fix.GetAtomMapNumbers() + assert mol.GetAtomicNumbers() == mol_fix.GetAtomicNumbers() From afbd44275bd998f82807a431653aab8a12d64bf9 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 27 Oct 2023 14:16:10 -0400 Subject: [PATCH 05/33] Add an adaptive spin multiplicity fix 1. also make the warning message for saturate mol silent --- rdmc/fix.py | 13 +++++++++---- rdmc/mol.py | 3 ++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/rdmc/fix.py b/rdmc/fix.py index 2abd4de0..4339547f 100644 --- a/rdmc/fix.py +++ b/rdmc/fix.py @@ -166,8 +166,8 @@ def fix_mol_by_remedy( fix_flag = False for _ in range(max_attempts): - tmp_mol.UpdatePropertyCache(False) - rdmolops.GetSymmSSSR(tmp_mol) + tmp_mol.UpdatePropertyCache(False) # Update connectivity + rdmolops.GetSymmSSSR(tmp_mol) # Update ring information try: # Remedy are designed to be unimolecular (group transformation), so the product will be unimolecular as well # If no match, RunReactants will return an empty tuple and thus cause an IndexError. @@ -266,7 +266,7 @@ def fix_mol( max_attempts: int = 10, sanitize: bool = True, fix_spin_multiplicity: bool = False, - mult: int = 1, + mult: int = 0, renumber_atoms: bool = True, ) -> "RDKitMol": """ @@ -282,9 +282,11 @@ def fix_mol( sanitize (bool, optional): Whether to sanitize the molecule after the fix. Defaults to ``True``. fix_spin_multiplicity (bool, optional): Whether to fix the spin multiplicity of the molecule. Defaults to ``False``. - mult (int, optional): The desired spin multiplicity. Defaults to ``1``. + mult (int, optional): The desired spin multiplicity. Defaults to ``0``, which means the lowest possible + spin multiplicity will be inferred from the number of unpaired electrons. Only used when ``fix_spin_multiplicity`` is ``True``. renumber_atoms (bool, optional): Whether to renumber the atoms after the fix. Defaults to ``True``. + Turn this off when the atom map number is not important. Returns: RDKitMol: The fixed molecule. @@ -297,6 +299,9 @@ def fix_mol( ) if fix_spin_multiplicity: + if mult == 0: + # Infer the possible lowest spin multiplicity from the number of unpaired electrons + mult = 1 if mol.GetSpinMultiplicity() % 2 else 2 mol = fix_mol_spin_multiplicity(mol, mult) if renumber_atoms: diff --git a/rdmc/mol.py b/rdmc/mol.py index 5340a617..8317e5f8 100644 --- a/rdmc/mol.py +++ b/rdmc/mol.py @@ -1840,7 +1840,8 @@ def SaturateMol(self, chain_length=chain_length, verbose=verbose) self.SaturateCarbene(multiplicity=multiplicity, verbose=verbose) - if self.GetSpinMultiplicity() != multiplicity: + if verbose and self.GetSpinMultiplicity() != multiplicity: + # TODO: make print a log print('SaturateMol fails after trying all methods and you need to be cautious about the generated mol.') def SetVdwMatrix(self, From ceabe0da243a126e643a09f04a2740a600bb9f6b Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 27 Oct 2023 14:46:24 -0400 Subject: [PATCH 06/33] Update format in utils.py --- rdmc/utils.py | 231 +++++++++++++++++++++++++++++--------------------- 1 file changed, 133 insertions(+), 98 deletions(-) diff --git a/rdmc/utils.py b/rdmc/utils.py index b818e2f5..bed085e3 100644 --- a/rdmc/utils.py +++ b/rdmc/utils.py @@ -13,6 +13,7 @@ from rdkit import RDLogger from rdkit.Chem import AllChem from rdkit.Chem.rdchem import BondType, Mol, RWMol + # Since 2022.09.1, RDKit added built-in XYZ parser using xyz2mol approach try: from rdkit.Chem import rdDetermineBonds @@ -35,32 +36,43 @@ import openbabel as ob # Bond order dictionary for RDKit, numbers are the bond order. -ORDERS = {1: BondType.SINGLE, 2: BondType.DOUBLE, 3: BondType.TRIPLE, 1.5: BondType.AROMATIC, - 4: BondType.QUADRUPLE, - 'S': BondType.SINGLE, 'D': BondType.DOUBLE, 'T': BondType.TRIPLE, 'B': BondType.AROMATIC, - 'Q': BondType.QUADRUPLE} +ORDERS = { + 1: BondType.SINGLE, + 2: BondType.DOUBLE, + 3: BondType.TRIPLE, + 1.5: BondType.AROMATIC, + 4: BondType.QUADRUPLE, + "S": BondType.SINGLE, + "D": BondType.DOUBLE, + "T": BondType.TRIPLE, + "B": BondType.AROMATIC, + "Q": BondType.QUADRUPLE, +} # The rotational bond definition in RDkit # It is the same as rdkit.Chem.Lipinski import RotatableBondSmarts -ROTATABLE_BOND_SMARTS = Chem.MolFromSmarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]') -ROTATABLE_BOND_SMARTS_WO_METHYL = Chem.MolFromSmarts('[!$(*#*)&!D1!H3]-&!@[!$(*#*)&!D1&!H3]') +ROTATABLE_BOND_SMARTS = Chem.MolFromSmarts("[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]") +ROTATABLE_BOND_SMARTS_WO_METHYL = Chem.MolFromSmarts( + "[!$(*#*)&!D1!H3]-&!@[!$(*#*)&!D1&!H3]" +) # When perceiving molecules, openbabel will always perceive carbon monoxide as [C]=O # Needs to correct it by [C-]#[O+] CO_OPENBABEL_PATTERN = ob.OBSmartsPattern() -CO_OPENBABEL_PATTERN.Init('[Cv2X1]=[OX1]') +CO_OPENBABEL_PATTERN.Init("[Cv2X1]=[OX1]") # Carbene, nitrene, and atomic oxygen templates. RDKit and Openbabel have difficulty # distinguish their multiplicity when input as SMILES or XYZ -CARBENE_PATTERN = Chem.MolFromSmarts('[Cv0,Cv1,Cv2,Nv0,Nv1,Ov0]') +CARBENE_PATTERN = Chem.MolFromSmarts("[Cv0,Cv1,Cv2,Nv0,Nv1,Ov0]") PERIODIC_TABLE = Chem.GetPeriodicTable() VDW_RADII = {i: PERIODIC_TABLE.GetRvdw(i) for i in range(1, 36)} -def determine_smallest_atom_index_in_torsion(atom1: 'rdkit.Chem.rdchem.Atom', - atom2: 'rdkit.Chem.rdchem.Atom', - ) -> int: +def determine_smallest_atom_index_in_torsion( + atom1: "rdkit.Chem.rdchem.Atom", + atom2: "rdkit.Chem.rdchem.Atom", +) -> int: """ Determine the smallest atom index in mol connected to ``atom1`` which is not ``atom2``. Returns a heavy atom if available, otherwise a hydrogen atom. @@ -74,8 +86,7 @@ def determine_smallest_atom_index_in_torsion(atom1: 'rdkit.Chem.rdchem.Atom', Returns: int: The smallest atom index (1-indexed) connected to ``atom1`` which is not ``atom2``. """ - neighbor = [a for a in atom1.GetNeighbors() if a.GetIdx() - != atom2.GetIdx()] + neighbor = [a for a in atom1.GetNeighbors() if a.GetIdx() != atom2.GetIdx()] atomic_num_list = sorted([nb.GetAtomicNum() for nb in neighbor]) min_atomic, max_atomic = atomic_num_list[0], atomic_num_list[-1] if min_atomic == max_atomic or min_atomic > 1: @@ -84,9 +95,10 @@ def determine_smallest_atom_index_in_torsion(atom1: 'rdkit.Chem.rdchem.Atom', return min([nb.GetIdx() for nb in neighbor if nb.GetAtomicNum() != 1]) -def find_internal_torsions(mol: Union['Mol', 'RWMol'], - exclude_methyl: bool = False, - ) -> list: +def find_internal_torsions( + mol: Union["Mol", "RWMol"], + exclude_methyl: bool = False, +) -> list: """ Find the internal torsions from RDkit molecule. @@ -98,8 +110,9 @@ def find_internal_torsions(mol: Union['Mol', 'RWMol'], list: A list of internal torsions. """ torsions = list() - smarts = ROTATABLE_BOND_SMARTS if not exclude_methyl \ - else ROTATABLE_BOND_SMARTS_WO_METHYL + smarts = ( + ROTATABLE_BOND_SMARTS if not exclude_methyl else ROTATABLE_BOND_SMARTS_WO_METHYL + ) rot_atom_pairs = mol.GetSubstructMatches(smarts) for atoms_ind in rot_atom_pairs: @@ -111,7 +124,7 @@ def find_internal_torsions(mol: Union['Mol', 'RWMol'], return torsions -def find_ring_torsions(mol: Union['Mol', 'RWMol']) -> list: +def find_ring_torsions(mol: Union["Mol", "RWMol"]) -> list: """ Find the ring from RDkit molecule. @@ -130,11 +143,12 @@ def find_ring_torsions(mol: Union['Mol', 'RWMol']) -> list: return ring_torsions -def openbabel_mol_to_rdkit_mol(obmol: 'openbabel.OBMol', - remove_hs: bool = False, - sanitize: bool = True, - embed: bool = True, - ) -> 'RWMol': +def openbabel_mol_to_rdkit_mol( + obmol: "openbabel.OBMol", + remove_hs: bool = False, + sanitize: bool = True, + embed: bool = True, +) -> "RWMol": """ Convert a OpenBabel molecular structure to a Chem.rdchem.RWMol object. Args: @@ -185,15 +199,18 @@ def openbabel_mol_to_rdkit_mol(obmol: 'openbabel.OBMol', # If OBMol has 3D information, it can be embed to the RDKit Mol if embed and (obmol.HasNonZeroCoords() or obmol.NumAtoms() == 1): coords = get_obmol_coords(obmol) - conf = Chem.rdchem.Conformer(coords.shape[0]) # Create a conformer that has number of atoms specified + conf = Chem.rdchem.Conformer( + coords.shape[0] + ) # Create a conformer that has number of atoms specified set_rdconf_coordinates(conf, coords) rw_mol.AddConformer(conf, assignId=True) return rw_mol -def rdkit_mol_to_openbabel_mol(rdmol: Union['Mol', 'RWMol'], - embed: bool = True, - ) -> 'openbabel.OBMol': +def rdkit_mol_to_openbabel_mol( + rdmol: Union["Mol", "RWMol"], + embed: bool = True, +) -> "openbabel.OBMol": """ Convert a Mol/RWMol to a Openbabel mol. This a temporary replace of ``rdkit_mol_to_openbabel_mol_manual``. @@ -212,7 +229,7 @@ def rdkit_mol_to_openbabel_mol(rdmol: Union['Mol', 'RWMol'], # RDKit Mol or RWMol sdf_str = Chem.MolToMolBlock(rdmol) obconv = ob.OBConversion() - obconv.SetInFormat('sdf') + obconv.SetInFormat("sdf") obmol = ob.OBMol() obconv.ReadString(obmol, sdf_str) @@ -239,9 +256,10 @@ def rdkit_mol_to_openbabel_mol(rdmol: Union['Mol', 'RWMol'], return obmol -def rdkit_mol_to_openbabel_mol_manual(rdmol: Union['Mol', 'RWMol'], - embed: bool = True, - ) -> 'openbabel.OBMol': +def rdkit_mol_to_openbabel_mol_manual( + rdmol: Union["Mol", "RWMol"], + embed: bool = True, +) -> "openbabel.OBMol": """ Convert a Mol/RWMol to a Openbabel mol. This function has a problem converting aromatic molecules. Example: 'c1nc[nH]n1'. Currently use a workaround, converting an @@ -262,11 +280,13 @@ def rdkit_mol_to_openbabel_mol_manual(rdmol: Union['Mol', 'RWMol'], if isotope != 0: obatom.SetIsotope(isotope) obatom.SetFormalCharge(rdatom.GetFormalCharge()) - bond_type_dict = {BondType.SINGLE: 1, - BondType.DOUBLE: 2, - BondType.TRIPLE: 3, - BondType.QUADRUPLE: 4, - BondType.AROMATIC: 5} + bond_type_dict = { + BondType.SINGLE: 1, + BondType.DOUBLE: 2, + BondType.TRIPLE: 3, + BondType.QUADRUPLE: 4, + BondType.AROMATIC: 5, + } for bond in rdmol.GetBonds(): atom1_idx = bond.GetBeginAtomIdx() + 1 atom2_idx = bond.GetEndAtomIdx() + 1 @@ -292,10 +312,11 @@ def rdkit_mol_to_openbabel_mol_manual(rdmol: Union['Mol', 'RWMol'], return obmol -def rmg_mol_to_rdkit_mol(rmgmol: 'rmgpy.molecule.Molecule', - remove_hs: bool = False, - sanitize: bool = True, - ) -> 'RWMol': +def rmg_mol_to_rdkit_mol( + rmgmol: "rmgpy.molecule.Molecule", + remove_hs: bool = False, + sanitize: bool = True, +) -> "RWMol": """ Convert a RMG molecular structure to an RDKit Mol object. Uses `RDKit `_ to perform the conversion. @@ -331,8 +352,11 @@ def rmg_mol_to_rdkit_mol(rmgmol: 'rmgpy.molecule.Molecule', # Avoid `SanitizeMol` adding undesired hydrogens rd_atom.SetNoImplicit(True) else: - explicit_Hs = [True for a, b in rmg_atom.edges.items() - if a.is_hydrogen() and b.is_single()] + explicit_Hs = [ + True + for a, b in rmg_atom.edges.items() + if a.is_hydrogen() and b.is_single() + ] rd_atom.SetNumExplicitHs(sum(explicit_Hs)) rd_atom.SetNoImplicit(True) rd_atom.SetNumRadicalElectrons(rmg_atom.radical_electrons) @@ -343,11 +367,13 @@ def rmg_mol_to_rdkit_mol(rmgmol: 'rmgpy.molecule.Molecule', # For other atoms, to be added once encountered if rmg_atom.is_carbon() and rmg_atom.lone_pairs >= 1 and not rmg_atom.charge: reset_num_electron[i] = rmg_atom.radical_electrons - elif rmg_atom.is_nitrogen() and rmg_atom.lone_pairs >= 2 and not rmg_atom.charge: + elif ( + rmg_atom.is_nitrogen() and rmg_atom.lone_pairs >= 2 and not rmg_atom.charge + ): reset_num_electron[i] = rmg_atom.radical_electrons elif rmg_atom.is_oxygen and rmg_atom.lone_pairs >= 3 and not rmg_atom.charge: reset_num_electron[i] = rmg_atom.radical_electrons - if not (remove_hs and rmg_atom.symbol == 'H'): + if not (remove_hs and rmg_atom.symbol == "H"): rwmol.AddAtom(rd_atom) # Add the bonds @@ -363,7 +389,8 @@ def rmg_mol_to_rdkit_mol(rmgmol: 'rmgpy.molecule.Molecule', rwmol.AddBond( atom_id_map[atom1.id], atom_id_map[atom2.id], - ORDERS[bond12.get_order_str()]) + ORDERS[bond12.get_order_str()], + ) # Rectify the molecule if remove_hs: @@ -377,8 +404,9 @@ def rmg_mol_to_rdkit_mol(rmgmol: 'rmgpy.molecule.Molecule', return rwmol -def set_rdconf_coordinates(conf: Union['Conformer', 'RDKitConf'], - coords: Union[tuple, list, np.ndarray]): +def set_rdconf_coordinates( + conf: Union["Conformer", "RDKitConf"], coords: Union[tuple, list, np.ndarray] +): """ Set the Positions of atoms of the conformer. @@ -416,8 +444,7 @@ def get_obmol_coords(obmol: ob.OBMol): return np.array(coords) -def set_obmol_coords(obmol: ob.OBMol, - coords: np.array): +def set_obmol_coords(obmol: ob.OBMol, coords: np.array): """ Get the atom coordinates from an openbabel molecule. If all coordinates are zero, It will return None @@ -430,8 +457,7 @@ def set_obmol_coords(obmol: ob.OBMol, atom.SetVector(ob.vector3(*coords[atom_idx].tolist())) -def fix_CO_openbabel(obmol: 'Openbabel.OBMol', - correct_CO: bool = True): +def fix_CO_openbabel(obmol: "Openbabel.OBMol", correct_CO: bool = True): """ Fix the CO perception issue for openbabel molecule. @@ -454,8 +480,7 @@ def fix_CO_openbabel(obmol: 'Openbabel.OBMol', atom.SetFormalCharge(+1) -def parse_xyz_by_openbabel(xyz: str, - correct_CO: bool = True): +def parse_xyz_by_openbabel(xyz: str, correct_CO: bool = True): """ Perceive a xyz str using openbabel and generate the corresponding OBMol. @@ -469,11 +494,11 @@ def parse_xyz_by_openbabel(xyz: str, ob.OBMol: An openbabel molecule from the xyz """ obconversion = ob.OBConversion() - obconversion.SetInFormat('xyz') + obconversion.SetInFormat("xyz") obmol = ob.OBMol() success = obconversion.ReadString(obmol, xyz) if not success: - raise ValueError('Unable to parse the provided xyz.') + raise ValueError("Unable to parse the provided xyz.") # Temporary Fix for Issue # 1 # This function works okay with openbabel 2.4.1 but not 3.1.1 @@ -491,7 +516,10 @@ def parse_xyz_by_openbabel(xyz: str, elif obatom.GetAtomicNum() == 8 and obatom.GetTotalValence() < 2: obatom.SetSpinMultiplicity(3 - obatom.GetTotalValence()) # Find the unsaturated nitrogen and halogen - elif obatom.GetAtomicNum() in [1, 9, 17, 35, 53] and obatom.GetTotalValence() == 0: + elif ( + obatom.GetAtomicNum() in [1, 9, 17, 35, 53] + and obatom.GetTotalValence() == 0 + ): obatom.SetSpinMultiplicity(2) # Correct [C]=O to [C-]#[O+] @@ -524,9 +552,10 @@ def get_atom_masses(atom_nums: Iterable): return [PERIODIC_TABLE.GetAtomicWeight(int(atom_num)) for atom_num in atom_nums] -def get_internal_coords(obmol, - nonredundant: bool = True, - ) -> list: +def get_internal_coords( + obmol, + nonredundant: bool = True, +) -> list: """ Generate a non_redundant_internal coordinate. @@ -535,9 +564,9 @@ def get_internal_coords(obmol, nonredundant (bool): whether non-redundant. Defaults to ``True``. """ obconv = ob.OBConversion() - obconv.SetOutFormat('gzmat') + obconv.SetOutFormat("gzmat") gzmat_str = obconv.WriteString(obmol) - lines = gzmat_str.split('Variables:')[0].splitlines()[6:] + lines = gzmat_str.split("Variables:")[0].splitlines()[6:] bonds = [] angles = [] torsions = [] @@ -562,8 +591,7 @@ def get_internal_coords(obmol, return bonds, angles, torsions -def reverse_map(map: Iterable, - as_list: bool = True): +def reverse_map(map: Iterable, as_list: bool = True): """ Inverse-transform the index and value relationship in a mapping. E.g., when doing a subgraph match, RDKit will returns a list @@ -586,16 +614,17 @@ def reverse_map(map: Iterable, return np.argsort(map) -def parse_xyz_by_jensen(xyz: str, - charge: int = 0, - allow_charged_fragments: bool = False, - use_huckel: bool = False, - embed_chiral: bool = True, - correct_CO: bool = True, - use_atom_maps: bool = False, - force_rdmc: bool = False, - **kwargs, - ) -> 'Mol': +def parse_xyz_by_jensen( + xyz: str, + charge: int = 0, + allow_charged_fragments: bool = False, + use_huckel: bool = False, + embed_chiral: bool = True, + correct_CO: bool = True, + use_atom_maps: bool = False, + force_rdmc: bool = False, + **kwargs, +) -> "Mol": """ Perceive a xyz str using `xyz2mol` by Jensen et al. and generate the corresponding RDKit Mol. The implementation refers the following blog: https://greglandrum.github.io/rdkit-blog/posts/2022-12-18-introducing-rdDetermineBonds.html @@ -620,24 +649,25 @@ def parse_xyz_by_jensen(xyz: str, """ # Version < 2022.09.1 if rdDetermineBonds is None or force_rdmc: - return parse_xyz_by_jensen_rdmc(xyz=xyz, - charge=charge, - allow_charged_fragments=allow_charged_fragments, - use_graph=True, - use_huckel=use_huckel, - embed_chiral=embed_chiral, - use_atom_maps=use_atom_maps, - correct_CO=correct_CO, - ) + return parse_xyz_by_jensen_rdmc( + xyz=xyz, + charge=charge, + allow_charged_fragments=allow_charged_fragments, + use_graph=True, + use_huckel=use_huckel, + embed_chiral=embed_chiral, + use_atom_maps=use_atom_maps, + correct_CO=correct_CO, + ) # Version >= 2022.09.1 try: mol = Chem.Mol(Chem.MolFromXYZBlock(xyz)) except BaseException: - raise ValueError('Unable to parse the provided xyz.') + raise ValueError("Unable to parse the provided xyz.") else: if mol is None: - raise ValueError('Unable to parse the provided xyz.') + raise ValueError("Unable to parse the provided xyz.") if mol.GetNumAtoms() == 1: atom = mol.GetAtomWithIdx(0) # No implicit Hs for single atom molecule @@ -649,20 +679,25 @@ def parse_xyz_by_jensen(xyz: str, # Set the num radical electrons atom.SetNumRadicalElectrons(valence - atom.GetFormalCharge()) return mol - rdDetermineBonds.DetermineConnectivity(mol, - useHueckel=use_huckel, - charge=charge,) + rdDetermineBonds.DetermineConnectivity( + mol, + useHueckel=use_huckel, + charge=charge, + ) # A force correction for CO - if correct_CO \ - and mol.GetNumAtoms() == 2 \ - and {atom.GetAtomicNum() for atom in mol.GetAtoms()} == {6, 8}: + if ( + correct_CO + and mol.GetNumAtoms() == 2 + and {atom.GetAtomicNum() for atom in mol.GetAtoms()} == {6, 8} + ): allow_charged_fragments = True - rdDetermineBonds.DetermineBondOrders(mol, - charge=charge, - allowChargedFragments=allow_charged_fragments, - embedChiral=embed_chiral, - useAtomMap=use_atom_maps, - ) + rdDetermineBonds.DetermineBondOrders( + mol, + charge=charge, + allowChargedFragments=allow_charged_fragments, + embedChiral=embed_chiral, + useAtomMap=use_atom_maps, + ) return mol @@ -779,5 +814,5 @@ def parse_xyz_by_jensen(xyz: str, "Mc": (1.00, 0.02, 0.00), "Lv": (1.00, 0.06, 0.00), "Ts": (1.00, 0.10, 0.00), - "Og": (1.00, 0.16, 0.00) + "Og": (1.00, 0.16, 0.00), } From e3ec3ecede2d0bd5ec630d430ddd8e96a65e9110 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 27 Oct 2023 15:36:10 -0400 Subject: [PATCH 07/33] add saturate radical to utils --- rdmc/utils.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/rdmc/utils.py b/rdmc/utils.py index bed085e3..1ebabcac 100644 --- a/rdmc/utils.py +++ b/rdmc/utils.py @@ -701,6 +701,43 @@ def parse_xyz_by_jensen( return mol +def get_closed_shell_cheap(mol: "RWMol") -> "RWMol": + """ + Get the closed shell molecule of a radical molecule. This is a cheap version + where no new atom is actually added to the molecule and all operation is inplace. + + Args: + mol (RWMol): The radical molecule. + + Returns: + RWMol: The closed shell molecule. + """ + for atom in mol.GetAtoms(): + if atom.GetNumRadicalElectrons(): + atom.SetNumRadicalElectrons(0) + atom.SetNoImplicit(False) + return mol + + +def get_closed_shell_by_add_hs( + mol: "RWMol", +) -> "RWMol": + """ + Get the closed shell molecule of a radical molecule by explicitly adding + hydrogen atoms to the molecule. + """ + atom_idx = mol.GetNumAtoms() + for atom in mol.GetAtoms(): + num_rad_elecs = atom.GetNumRadicalElectrons() + if num_rad_elecs: + for _ in range(num_rad_elecs): + mol.AddAtom(Chem.rdchem.Atom(1)) + mol.AddBond(atom_idx, atom.GetIdx(), Chem.rdchem.BondType.SINGLE) + atom_idx += 1 + atom.SetNumRadicalElectrons(0) + return mol + + # CPK (Corey-Pauling-Koltun) color scheme, Generated using ChatGPT CPK_COLOR_PALETTE = { "H": (1.00, 1.00, 1.00), From 6f8429a906cbe998032f68f72283d0c064045f0c Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 27 Oct 2023 15:18:27 -0400 Subject: [PATCH 08/33] Add HasSameConnectivity and GetClosedShellMol HasSameConnectivity is a helper function to compare two molecule's adjacency matrix. Rename the one comparing conformer connectivity to HasSameConnectivityConformer; GetClosedShellMol is a helper function to get the closed shell form of a radical --- rdmc/mol.py | 69 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 12 deletions(-) diff --git a/rdmc/mol.py b/rdmc/mol.py index 8317e5f8..37df21aa 100644 --- a/rdmc/mol.py +++ b/rdmc/mol.py @@ -1098,11 +1098,27 @@ def HasCollidingAtoms(self, # if the distance is smaller than a threshold, the atom has a high chance of colliding return not np.all(self.GetVdwMatrix(threshold=threshold) <= dist_mat) - def HasSameConnectivity(self, - confId: int = 0, - backend: str = 'openbabel', - **kwargs, - ) -> bool: + def HasSameConnectivity( + self, + refmol: "RDKitMol", + ) -> bool: + """ + Check wheter the molecule has the same connectivity as the reference molecule. + + Args: + refmol (RDKitMol): The reference molecule. + + Returns: + bool: Whether the molecule has the same connectivity as the reference molecule. + """ + return np.array_equal(self.GetAdjacencyMatrix(), refmol.GetAdjacencyMatrix()) + + def HasSameConnectivityConformer( + self, + confId: int = 0, + backend: str = "openbabel", + **kwargs, + ) -> bool: """ Check whether the conformer of the molecule (defined by its spacial coordinates) as the same connectivity as the molecule. @@ -1123,20 +1139,18 @@ def HasSameConnectivity(self, # Sanitization is not applied to account for # special cases like zwitterionic molecules # or molecule complexes - new_mol = RDKitMol.FromXYZ(xyz_str, - header=True, - backend=backend, - sanitize=False, - **kwargs) + new_mol = RDKitMol.FromXYZ( + xyz_str, header=True, backend=backend, sanitize=False, **kwargs + ) except Exception as exc: # Error in preserving the molecule - print(f'Error in preserving the molecule: {exc}') + print(f"Error in preserving the molecule: {exc}") traceback.print_exc() return False else: conf_adj_mat = new_mol.GetAdjacencyMatrix() - return (mol_adj_mat == conf_adj_mat).all() + return np.array_equal(mol_adj_mat, conf_adj_mat) def Kekulize(self, clearAromaticFlags: bool = False): @@ -1844,6 +1858,37 @@ def SaturateMol(self, # TODO: make print a log print('SaturateMol fails after trying all methods and you need to be cautious about the generated mol.') + def GetClosedShellMol(self, + cheap: bool = False, + sanitize: bool = True, + ) -> 'RDKitMol': + """ + Get a closed shell molecule by removing all radical electrons and adding + H atoms to these radical sites. This method currently only work for radicals + and will not work properly for singlet radicals. + + Args: + cheap (bool): Whether to use a cheap method where H atoms are only implicitly added. + Defaults to ``False``. Setting it to ``False`` only when the molecule + is immediately used for generating SMILES/InChI and other representations, + and no further manipulation is needed. Otherwise, it may be confusing as + the hydrogen atoms will not appear in the list of atoms, not display in the + 2D graph, etc. + sanitize (bool): Whether to sanitize the molecule. Defaults to ``True``. + + Returns: + RDKitMol: A closed shell molecule. + """ + mol_copy = self.Copy(quickCopy=True) + if cheap: + mol_copy = get_closed_shell_cheap(mol_copy) + else: + mol_copy = get_closed_shell_by_add_hs(mol_copy) + mol_copy.SetAtomMapNumbers() + if sanitize: + mol_copy.Sanitize() + return mol_copy + def SetVdwMatrix(self, threshold: float = 0.4, vdw_radii: dict = VDW_RADII): From ab2440830afbbc46d1428c0f7d34327addba1581 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 27 Oct 2023 15:35:52 -0400 Subject: [PATCH 09/33] Update mol.py format --- rdmc/mol.py | 916 +++++++++++++++++++++++++++++----------------------- 1 file changed, 515 insertions(+), 401 deletions(-) diff --git a/rdmc/mol.py b/rdmc/mol.py index 37df21aa..f65a6be4 100644 --- a/rdmc/mol.py +++ b/rdmc/mol.py @@ -40,9 +40,7 @@ # Keep the representation method from rdchem.Mol -KEEP_RDMOL_ATTRIBUTES = ['_repr_html_', - '_repr_png_', - '_repr_svg_'] +KEEP_RDMOL_ATTRIBUTES = ["_repr_html_", "_repr_png_", "_repr_svg_"] class RDKitMol(object): @@ -53,9 +51,7 @@ class RDKitMol(object): shortcuts so that users don't need to refer to other RDKit modules. """ - def __init__(self, - mol: Union[Mol, RWMol], - keepAtomMap: bool = True): + def __init__(self, mol: Union[Mol, RWMol], keepAtomMap: bool = True): """ Generate an ``RDKitMol`` molecule object instance from a RDKit ``Chem.rdchem.Mol`` or ``RWMol`` molecule. @@ -93,10 +89,11 @@ def __init__(self, # Perceive rings self.GetSymmSSSR() - def AddNullConformer(self, - confId: Optional[int] = None, - random: bool = True, - ) -> None: + def AddNullConformer( + self, + confId: Optional[int] = None, + random: bool = True, + ) -> None: """ Embed a conformer with atoms' coordinates of random numbers or with all atoms located at the origin to the current `RDKitMol`. @@ -115,9 +112,10 @@ def AddNullConformer(self, conf.SetId(confId) self._mol.AddConformer(conf) - def AddRedundantBonds(self, - bonds: Iterable, - ) -> 'RDKitMol': + def AddRedundantBonds( + self, + bonds: Iterable, + ) -> "RDKitMol": """ Add redundant bonds (not originally exist in the molecule) for facilitating a few molecule operation or analyses. This function @@ -132,16 +130,17 @@ def AddRedundantBonds(self, mol_cp.GetSymmSSSR() return mol_cp - def AlignMol(self, - prbMol: Union['RDKitMol', 'RWMol', 'Mol'] = None, - refMol: Union['RDKitMol', 'RWMol', 'Mol'] = None, - prbCid: int = 0, - refCid: int = 0, - reflect: bool = False, - atomMaps: Optional[list] = None, - maxIters: int = 1000, - weights: list = [], - ) -> float: + def AlignMol( + self, + prbMol: Union["RDKitMol", "RWMol", "Mol"] = None, + refMol: Union["RDKitMol", "RWMol", "Mol"] = None, + prbCid: int = 0, + refCid: int = 0, + reflect: bool = False, + atomMaps: Optional[list] = None, + maxIters: int = 1000, + weights: list = [], + ) -> float: """ Align molecules based on a reference molecule. This function will also return the RMSD value for the best alignment. When leaving both ``prbMol`` and ``refMol`` blank, the function will align current molecule's conformers, and @@ -163,10 +162,14 @@ def AlignMol(self, float: RMSD value. """ if prbMol is not None and refMol is not None: - raise ValueError('`refMol` and `prbMol` should not be provided simultaneously.') + raise ValueError( + "`refMol` and `prbMol` should not be provided simultaneously." + ) elif prbMol is None and refMol is None and prbCid == refCid: - raise ValueError('Cannot match the same conformer for the given molecule. `prbCid` and `refCid` needs' - 'to be different if either `prbMol` or `refMol` is not provided.') + raise ValueError( + "Cannot match the same conformer for the given molecule. `prbCid` and `refCid` needs" + "to be different if either `prbMol` or `refMol` is not provided." + ) refMol = refMol or self prbMol = prbMol or self @@ -177,29 +180,31 @@ def AlignMol(self, prbMol.Reflect(id=prbCid) rmsd = np.inf for atom_map in atomMaps: - cur_rmsd = Chem.rdMolAlign.AlignMol(refMol=refMol._mol, - prbMol=prbMol._mol, - prbCid=prbCid, - refCid=refCid, - atomMap=atom_map, - reflect=reflect, - maxIters=maxIters, - weights=weights, - ) + cur_rmsd = Chem.rdMolAlign.AlignMol( + refMol=refMol._mol, + prbMol=prbMol._mol, + prbCid=prbCid, + refCid=refCid, + atomMap=atom_map, + reflect=reflect, + maxIters=maxIters, + weights=weights, + ) if cur_rmsd < rmsd: rmsd = cur_rmsd if reflect: prbMol.Reflect(id=prbCid) return rmsd - def CalcRMSD(self, - prbMol: 'RDKitMol', - prbCid: int = 0, - refCid: int = 0, - reflect: bool = False, - atomMaps: Optional[list] = None, - weights: list = [], - ) -> float: + def CalcRMSD( + self, + prbMol: "RDKitMol", + prbCid: int = 0, + refCid: int = 0, + reflect: bool = False, + atomMaps: Optional[list] = None, + weights: list = [], + ) -> float: """ Calculate the RMSD between conformers of two molecules. Note this function will not align conformers, thus molecules' geometries are not translated or rotated during the calculation. You can expect a larger number compared to the RMSD from :func:`~RDKitMol.AlignMol`. @@ -223,21 +228,23 @@ def CalcRMSD(self, if reflect: prbMol.Reflect(id=prbCid) try: - rmsd = Chem.rdMolAlign.CalcRMS(refMol=self._mol, - prbMol=prbMol.ToRWMol(), - prbId=prbCid, - refId=refCid, - map=atomMaps, - weights=weights, - ) + rmsd = Chem.rdMolAlign.CalcRMS( + refMol=self._mol, + prbMol=prbMol.ToRWMol(), + prbId=prbCid, + refId=refCid, + map=atomMaps, + weights=weights, + ) except AttributeError: - raise NotImplementedError('The RDKit version used doesn\'t support this calculation.') + raise NotImplementedError( + "The RDKit version used doesn't support this calculation." + ) if reflect: prbMol.Reflect(id=prbCid) return rmsd - def AssignStereochemistryFrom3D(self, - confId: int = 0): + def AssignStereochemistryFrom3D(self, confId: int = 0): """ Assign the chirality type to a molecule's atoms. @@ -246,11 +253,12 @@ def AssignStereochemistryFrom3D(self, """ Chem.rdmolops.AssignStereochemistryFrom3D(self._mol, confId=confId) - def CombineMol(self, - molFrag: Union['RDKitMol', 'Mol'], - offset: Union[list, tuple, float, np.ndarray] = 0, - c_product: bool = False, - ) -> 'RDKitMol': + def CombineMol( + self, + molFrag: Union["RDKitMol", "Mol"], + offset: Union[list, tuple, float, np.ndarray] = 0, + c_product: bool = False, + ) -> "RDKitMol": """ Combine the current molecule with the given ``molFrag`` (another molecule or fragment). A new object instance will be created and changes are not made to the current molecule. @@ -286,10 +294,10 @@ def CombineMol(self, if isinstance(offset, (list, tuple)): offset = np.array(offset) elif isinstance(offset, (int, float)): - offset = np.array([[offset, 0., 0.]]) + offset = np.array([[offset, 0.0, 0.0]]) else: if isinstance(offset, (list, tuple, np.ndarray)): - for i, coord in enumerate(['x', 'y', 'z']): + for i, coord in enumerate(["x", "y", "z"]): setattr(vector, coord, float(offset[0, i])) else: vector.x = float(offset) @@ -298,8 +306,10 @@ def CombineMol(self, if c_product: c1s = m1.GetConformers() c2s = m2.GetConformers() - pos_list = [[c1.GetPositions(), c2.GetPositions() + offset] - for c1, c2 in cartesian_product(c1s, c2s)] + pos_list = [ + [c1.GetPositions(), c2.GetPositions() + offset] + for c1, c2 in cartesian_product(c1s, c2s) + ] combined.EmbedMultipleNullConfs(len(c1s) * len(c2s), random=False) for i, pos_pair in enumerate(pos_list): @@ -307,11 +317,12 @@ def CombineMol(self, return combined - def Copy(self, - quickCopy: bool = False, - confId: int = -1, - copy_attrs: Optional[list] = None, - ) -> 'RDKitMol': + def Copy( + self, + quickCopy: bool = False, + confId: int = -1, + copy_attrs: Optional[list] = None, + ) -> "RDKitMol": """ Make a copy of the current ``RDKitMol``. @@ -329,9 +340,7 @@ def Copy(self, setattr(new_mol, attr, copy.deepcopy(getattr(self, attr))) return new_mol - def EmbedConformer(self, - embed_null: bool = True, - **kwargs): + def EmbedConformer(self, embed_null: bool = True, **kwargs): """ Embed a conformer to the ``RDKitMol``. This will overwrite current conformers. By default, it will first try embedding a 3D conformer; if fails, it then try to compute 2D coordinates @@ -357,12 +366,9 @@ def EmbedConformer(self, if embed_null: self.EmbedNullConformer() else: - raise RuntimeError('Cannot embed conformer for this molecule!') + raise RuntimeError("Cannot embed conformer for this molecule!") - def EmbedMultipleConfs(self, - n: int = 1, - embed_null: bool = True, - **kwargs): + def EmbedMultipleConfs(self, n: int = 1, embed_null: bool = True, **kwargs): """ Embed multiple conformers to the ``RDKitMol``. This will overwrite current conformers. By default, it will first try embedding a 3D conformer; if fails, it then try to compute 2D coordinates @@ -384,10 +390,9 @@ def EmbedMultipleConfs(self, if embed_null: results = self.EmbedMultipleNullConfs(n=n) else: - raise RuntimeError('Cannot embed conformer for this molecule!') + raise RuntimeError("Cannot embed conformer for this molecule!") - def EmbedNullConformer(self, - random: bool = True): + def EmbedNullConformer(self, random: bool = True): """ Embed a conformer with null or random atom coordinates. This helps the cases where a conformer can not be successfully embedded. You can choose to generate all zero coordinates or random coordinates. @@ -401,9 +406,7 @@ def EmbedNullConformer(self, """ self.EmbedMultipleNullConfs(n=1, random=random) - def EmbedMultipleNullConfs(self, - n: int = 10, - random: bool = True): + def EmbedMultipleNullConfs(self, n: int = 10, random: bool = True): """ Embed conformers with null or random atom coordinates. This helps the cases where a conformer can not be successfully embedded. You can choose to generate all zero coordinates or random coordinates. @@ -420,18 +423,21 @@ def EmbedMultipleNullConfs(self, num_atoms = self.GetNumAtoms() for i in range(n): conf = Conformer() - coords = np.random.rand(num_atoms, 3) if random else np.zeros((num_atoms, 3)) + coords = ( + np.random.rand(num_atoms, 3) if random else np.zeros((num_atoms, 3)) + ) set_rdconf_coordinates(conf, coords) conf.SetId(i) self._mol.AddConformer(conf) - @ classmethod - def FromOBMol(cls, - obMol: 'openbabel.OBMol', - removeHs: bool = False, - sanitize: bool = True, - embed: bool = True, - ) -> 'RDKitMol': + @classmethod + def FromOBMol( + cls, + obMol: "openbabel.OBMol", + removeHs: bool = False, + sanitize: bool = True, + embed: bool = True, + ) -> "RDKitMol": """ Convert a OpenBabel Mol to an RDKitMol object. @@ -448,10 +454,11 @@ def FromOBMol(cls, return cls(rw_mol) @classmethod - def FromMol(cls, - mol: Union[Mol, RWMol], - keepAtomMap: bool = True, - ) -> 'RDKitMol': + def FromMol( + cls, + mol: Union[Mol, RWMol], + keepAtomMap: bool = True, + ) -> "RDKitMol": """ Convert a RDKit ``Chem.rdchem.Mol`` molecule to ``RDKitMol`` Molecule. @@ -467,14 +474,15 @@ def FromMol(cls, return cls(mol, keepAtomMap=keepAtomMap) @classmethod - def FromSmiles(cls, - smiles: str, - removeHs: bool = False, - addHs: bool = True, - sanitize: bool = True, - allowCXSMILES: bool = True, - keepAtomMap: bool = True, - ) -> 'RDKitMol': + def FromSmiles( + cls, + smiles: str, + removeHs: bool = False, + addHs: bool = True, + sanitize: bool = True, + allowCXSMILES: bool = True, + keepAtomMap: bool = True, + ) -> "RDKitMol": """ Convert a SMILES string to an ``RDkitMol`` object. @@ -524,9 +532,10 @@ def FromSmiles(cls, return mol @classmethod - def FromSmarts(cls, - smarts: str, - ) -> 'RDKitMol': + def FromSmarts( + cls, + smarts: str, + ) -> "RDKitMol": """ Convert a SMARTS to an ``RDKitMol`` object. @@ -540,12 +549,13 @@ def FromSmarts(cls, return cls(mol) @classmethod - def FromInchi(cls, - inchi: str, - removeHs: bool = False, - addHs: bool = True, - sanitize: bool = True, - ): + def FromInchi( + cls, + inchi: str, + removeHs: bool = False, + addHs: bool = True, + sanitize: bool = True, + ): """ Construct an ``RDKitMol`` object from a InChI string. @@ -560,19 +570,18 @@ def FromInchi(cls, Returns: RDKitMol: An RDKit molecule object corresponding to the InChI. """ - mol = Chem.inchi.MolFromInchi(inchi, - sanitize=sanitize, - removeHs=removeHs) + mol = Chem.inchi.MolFromInchi(inchi, sanitize=sanitize, removeHs=removeHs) if not removeHs and addHs: mol = Chem.rdmolops.AddHs(mol) return cls(mol) @classmethod - def FromRMGMol(cls, - rmgMol: 'rmgpy.molecule.Molecule', - removeHs: bool = False, - sanitize: bool = True, - ) -> 'RDKitMol': + def FromRMGMol( + cls, + rmgMol: "rmgpy.molecule.Molecule", + removeHs: bool = False, + sanitize: bool = True, + ) -> "RDKitMol": """ Convert an RMG ``Molecule`` to an ``RDkitMol`` object. @@ -584,19 +593,19 @@ def FromRMGMol(cls, Returns: RDKitMol: An RDKit molecule object corresponding to the RMG Molecule. """ - return cls(rmg_mol_to_rdkit_mol(rmgMol, - removeHs, - sanitize)) + return cls(rmg_mol_to_rdkit_mol(rmgMol, removeHs, sanitize)) @classmethod - def FromXYZ(cls, - xyz: str, - backend: str = 'openbabel', - header: bool = True, - correctCO: bool = True, - sanitize: bool = True, - embed_chiral: bool = False, - **kwargs): + def FromXYZ( + cls, + xyz: str, + backend: str = "openbabel", + header: bool = True, + correctCO: bool = True, + sanitize: bool = True, + embed_chiral: bool = False, + **kwargs, + ): """ Convert xyz string to RDKitMol. @@ -628,7 +637,7 @@ def FromXYZ(cls, xyz = f"{len(xyz.splitlines())}\n\n{xyz}" # Openbabel support read xyz and perceive atom connectivities - if backend.lower() == 'openbabel': + if backend.lower() == "openbabel": obmol = parse_xyz_by_openbabel(xyz, correct_CO=correctCO) rdmol = cls.FromOBMol(obmol, sanitize=sanitize) if embed_chiral: @@ -637,23 +646,25 @@ def FromXYZ(cls, # https://github.com/jensengroup/xyz2mol/blob/master/xyz2mol.py # provides an approach to convert xyz to mol - elif backend.lower() == 'jensen': - mol = parse_xyz_by_jensen(xyz, - correct_CO=correctCO, - embed_chiral=embed_chiral, - **kwargs) + elif backend.lower() == "jensen": + mol = parse_xyz_by_jensen( + xyz, correct_CO=correctCO, embed_chiral=embed_chiral, **kwargs + ) return cls(mol) else: - raise NotImplementedError(f'Backend ({backend}) is not supported. Only `openbabel` and `jensen`' - f' are supported.') + raise NotImplementedError( + f"Backend ({backend}) is not supported. Only `openbabel` and `jensen`" + f" are supported." + ) @classmethod - def FromSDF(cls, - sdf: str, - removeHs: bool = False, - sanitize: bool = True, - ) -> 'RDKitMol': + def FromSDF( + cls, + sdf: str, + removeHs: bool = False, + sanitize: bool = True, + ) -> "RDKitMol": """ Convert an SDF string to RDKitMol. @@ -670,16 +681,17 @@ def FromSDF(cls, return cls(mol) @classmethod - def FromFile(cls, - path: str, - backend: str = 'openbabel', - header: bool = True, - correctCO: bool = True, - removeHs: bool = False, - sanitize: bool = True, - sameMol: bool = False, - **kwargs - ) -> 'RDKitMol': + def FromFile( + cls, + path: str, + backend: str = "openbabel", + header: bool = True, + correctCO: bool = True, + removeHs: bool = False, + sanitize: bool = True, + sameMol: bool = False, + **kwargs, + ) -> "RDKitMol": """ Read RDKitMol from a file. @@ -703,7 +715,14 @@ def FromFile(cls, if extension == ".xyz": with open(path, "r") as f: xyz = f.read() - return cls.FromXYZ(xyz, backend=backend, header=header, correctCO=correctCO, sanitize=sanitize, **kwargs) + return cls.FromXYZ( + xyz, + backend=backend, + header=header, + correctCO=correctCO, + sanitize=sanitize, + **kwargs, + ) # use rdkit's sdf reader to read in multiple mols elif extension == ".sdf": @@ -712,15 +731,18 @@ def FromFile(cls, if sameMol: new_mol = mols[0].Copy(quickCopy=True) - [new_mol.AddConformer(m.GetConformer().ToConformer(), assignId=True) for m in mols] + for m in mols: + new_mol.AddConformer(m.GetConformer().ToConformer(), assignId=True) return new_mol else: return mols else: - raise NotImplementedError(f'Extension ({extension}) is not supported. Only `.xyz` and `.sdf`' - f' are supported.') + raise NotImplementedError( + f"Extension ({extension}) is not supported. Only `.xyz` and `.sdf`" + f" are supported." + ) def GetAdjacencyMatrix(self): """ @@ -748,13 +770,15 @@ def GetAtomicNumbers(self): """ return [atom.GetAtomicNum() for atom in self.GetAtoms()] - def GetBestAlign(self, - refMol, - prbCid: int = 0, - refCid: int = 0, - atomMaps: Optional[list] = None, - maxIters: int = 1000, - keepBestConformer: bool = True): + def GetBestAlign( + self, + refMol, + prbCid: int = 0, + refCid: int = 0, + atomMaps: Optional[list] = None, + maxIters: int = 1000, + keepBestConformer: bool = True, + ): """ This is a wrapper function for calling ``AlignMol`` twice, with ``reflect`` to ``True`` and ``False``, respectively. @@ -777,19 +801,23 @@ def GetBestAlign(self, bool: if reflected conformer gives a better result. """ # Align without refection - rmsd = self.AlignMol(refMol=refMol, - prbCid=prbCid, - refCid=refCid, - atomMaps=atomMaps, - maxIters=maxIters) + rmsd = self.AlignMol( + refMol=refMol, + prbCid=prbCid, + refCid=refCid, + atomMaps=atomMaps, + maxIters=maxIters, + ) # Align with refection - rmsd_r = self.AlignMol(refMol=refMol, - prbCid=prbCid, - refCid=refCid, - atomMaps=atomMaps, - maxIters=maxIters, - reflect=True) + rmsd_r = self.AlignMol( + refMol=refMol, + prbCid=prbCid, + refCid=refCid, + atomMaps=atomMaps, + maxIters=maxIters, + reflect=True, + ) # The conformation is reflected, now reflect back self.Reflect(id=prbCid) @@ -798,12 +826,14 @@ def GetBestAlign(self, # Make sure the resulted conformer is the one with the lowest RMSD if keepBestConformer: - rmsd = self.AlignMol(refMol=refMol, - prbCid=prbCid, - refCid=refCid, - atomMaps=atomMaps, - maxIters=maxIters, - reflect=reflect,) + rmsd = self.AlignMol( + refMol=refMol, + prbCid=prbCid, + refCid=refCid, + atomMaps=atomMaps, + maxIters=maxIters, + reflect=reflect, + ) else: rmsd = rmsd if rmsd <= rmsd_r else rmsd_r @@ -816,7 +846,10 @@ def GetBondsAsTuples(self) -> List[tuple]: Returns: list: A list of length-2 sets indicating the bonding atoms. """ - return [tuple(sorted((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()))) for bond in self.GetBonds()] + return [ + tuple(sorted((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()))) + for bond in self.GetBonds() + ] def GetElementSymbols(self) -> List[str]: """ @@ -845,9 +878,10 @@ def GetAtomMasses(self) -> List[float]: """ return get_atom_masses(self.GetAtomicNumbers()) - def GetConformer(self, - id: int = 0, - ) -> 'RDKitConf': + def GetConformer( + self, + id: int = 0, + ) -> "RDKitConf": """ Get the embedded conformer according to ID. @@ -868,9 +902,10 @@ def GetConformer(self, rdkitconf.SetOwningMol(self) return rdkitconf - def GetConformers(self, - ids: Union[list, tuple] = [0], - ) -> List['RDKitConf']: + def GetConformers( + self, + ids: Union[list, tuple] = [0], + ) -> List["RDKitConf"]: """ Get the embedded conformers according to IDs. @@ -894,7 +929,7 @@ def GetConformers(self, conformers.append(conformer) return conformers - def GetAllConformers(self) -> List['RDKitConf']: + def GetAllConformers(self) -> List["RDKitConf"]: """ Get all of the embedded conformers. @@ -916,12 +951,13 @@ def GetDistanceMatrix(self, id: int = 0) -> np.ndarray: """ return Chem.rdmolops.Get3DDistanceMatrix(self._mol, confId=id) - def GetFingerprint(self, - fpType: str = 'morgan', - numBits: int = 2048, - count: bool = False, - **kwargs, - ) -> np.ndarray: + def GetFingerprint( + self, + fpType: str = "morgan", + numBits: int = 2048, + count: bool = False, + **kwargs, + ) -> np.ndarray: """ Get the fingerprint of the molecule. @@ -933,11 +969,9 @@ def GetFingerprint(self, Returns: np.ndarray: A fingerprint of the molecule. """ - return get_fingerprint(self, - fp_type=fpType, - num_bits=numBits, - count=count, - **kwargs) + return get_fingerprint( + self, fp_type=fpType, num_bits=numBits, count=count, **kwargs + ) def GetPositions(self, id: int = 0) -> np.ndarray: """ @@ -962,11 +996,12 @@ def GetSymmSSSR(self) -> tuple: """ return Chem.GetSymmSSSR(self._mol) - def GetSubstructMatch(self, - query: Union['RDKitMol', 'RWMol', 'Mol'], - useChirality: bool = False, - useQueryQueryMatches: bool = False - ) -> tuple: + def GetSubstructMatch( + self, + query: Union["RDKitMol", "RWMol", "Mol"], + useChirality: bool = False, + useQueryQueryMatches: bool = False, + ) -> tuple: """ Returns the indices of the molecule's atoms that match a substructure query. @@ -979,17 +1014,22 @@ def GetSubstructMatch(self, tuple: A tuple of matched indices. """ try: - return self._mol.GetSubstructMatch(query.ToRWMol(), useChirality, useQueryQueryMatches) + return self._mol.GetSubstructMatch( + query.ToRWMol(), useChirality, useQueryQueryMatches + ) except AttributeError: - return self._mol.GetSubstructMatch(query, useChirality, useQueryQueryMatches) + return self._mol.GetSubstructMatch( + query, useChirality, useQueryQueryMatches + ) - def GetSubstructMatches(self, - query: Union['RDKitMol', 'RWMol', 'Mol'], - uniquify: bool = True, - useChirality: bool = False, - useQueryQueryMatches: bool = False, - maxMatches: int = 1000, - ) -> tuple: + def GetSubstructMatches( + self, + query: Union["RDKitMol", "RWMol", "Mol"], + uniquify: bool = True, + useChirality: bool = False, + useQueryQueryMatches: bool = False, + maxMatches: int = 1000, + ) -> tuple: """ Returns tuples of the indices of the molecule's atoms that match a substructure query. @@ -1005,18 +1045,25 @@ def GetSubstructMatches(self, tuple: A tuple of tuples of matched indices. """ try: - return self._mol.GetSubstructMatches(query.ToRWMol(), uniquify, useChirality, - useQueryQueryMatches, maxMatches) + return self._mol.GetSubstructMatches( + query.ToRWMol(), + uniquify, + useChirality, + useQueryQueryMatches, + maxMatches, + ) except AttributeError: - return self._mol.GetSubstructMatches(query, uniquify, useChirality, - useQueryQueryMatches, maxMatches) + return self._mol.GetSubstructMatches( + query, uniquify, useChirality, useQueryQueryMatches, maxMatches + ) - def GetMolFrags(self, - asMols: bool = False, - sanitize: bool = True, - frags: Optional[list] = None, - fragsMolAtomMapping: Optional[list] = None, - ) -> tuple: + def GetMolFrags( + self, + asMols: bool = False, + sanitize: bool = True, + frags: Optional[list] = None, + fragsMolAtomMapping: Optional[list] = None, + ) -> tuple: """ Finds the disconnected fragments from a molecule. For example, for the molecule "CC(=O)[O-].[NH3+]C", this function will split the molecules into a list of "CC(=O)[O-]" and "[NH3+]C". By defaults, @@ -1036,16 +1083,22 @@ def GetMolFrags(self, Returns: tuple: a tuple of atom mapping or a tuple of split molecules (RDKitMol). """ - frags = Chem.rdmolops.GetMolFrags(self._mol, asMols=asMols, sanitizeFrags=sanitize, - frags=frags, fragsMolAtomMapping=fragsMolAtomMapping) + frags = Chem.rdmolops.GetMolFrags( + self._mol, + asMols=asMols, + sanitizeFrags=sanitize, + frags=frags, + fragsMolAtomMapping=fragsMolAtomMapping, + ) if asMols: return tuple(RDKitMol(mol) for mol in frags) return frags - def GetTorsionalModes(self, - excludeMethyl: bool = False, - includeRings: bool = False, - ) -> list: + def GetTorsionalModes( + self, + excludeMethyl: bool = False, + includeRings: bool = False, + ) -> list: """ Get all of the torsional modes (rotors) from the molecule. @@ -1061,9 +1114,10 @@ def GetTorsionalModes(self, torsions += find_ring_torsions(self._mol) return torsions - def GetVdwMatrix(self, - threshold: float = 0.4, - ) -> Optional[np.ndarray]: + def GetVdwMatrix( + self, + threshold: float = 0.4, + ) -> Optional[np.ndarray]: """ Get the derived Van der Waals matrix, which can be used to analyze the collision of atoms. More information can be found from ``generate_vdw_mat``. @@ -1081,9 +1135,10 @@ def GetVdwMatrix(self, self.SetVdwMatrix(threshold=threshold) return self._vdw_mat - def HasCollidingAtoms(self, - threshold: float = 0.4, - ) -> bool: + def HasCollidingAtoms( + self, + threshold: float = 0.4, + ) -> bool: """ Check whether the molecule has colliding atoms. @@ -1152,8 +1207,7 @@ def HasSameConnectivityConformer( return np.array_equal(mol_adj_mat, conf_adj_mat) - def Kekulize(self, - clearAromaticFlags: bool = False): + def Kekulize(self, clearAromaticFlags: bool = False): """ Kekulizes the molecule. @@ -1163,10 +1217,11 @@ def Kekulize(self, """ Chem.KekulizeIfPossible(self._mol, clearAromaticFlags=clearAromaticFlags) - def PrepareOutputMol(self, - removeHs: bool = False, - sanitize: bool = True, - ) -> Mol: + def PrepareOutputMol( + self, + removeHs: bool = False, + sanitize: bool = True, + ) -> Mol: """ Generate a RDKit Mol instance for output purpose, to ensure that the original molecule is not modified. @@ -1194,8 +1249,7 @@ def PrepareOutputMol(self, Chem.rdmolops.SanitizeMol(mol) # mol is modified in place return mol - def RemoveHs(self, - sanitize: bool = True): + def RemoveHs(self, sanitize: bool = True): """ Remove H atoms. Useful when trying to match heavy atoms. @@ -1204,10 +1258,11 @@ def RemoveHs(self, """ return Chem.rdmolops.RemoveHs(self._mol, sanitize=sanitize) - def RenumberAtoms(self, - newOrder: Optional[list] = None, - updateAtomMap: bool = True, - ) -> 'RDKitMol': + def RenumberAtoms( + self, + newOrder: Optional[list] = None, + updateAtomMap: bool = True, + ) -> "RDKitMol": """ Return a new copy of RDKitMol that has atom (index) reordered. @@ -1228,15 +1283,22 @@ def RenumberAtoms(self, try: rwmol = Chem.rdmolops.RenumberAtoms(self._mol, newOrder) except RuntimeError: - raise ValueError(f'The input newOrder ({newOrder}) is invalid. If no newOrder is provided' - f', it may due to the atoms doesn\'t have atom map numbers.') + raise ValueError( + f"The input newOrder ({newOrder}) is invalid. If no newOrder is provided" + f", it may due to the atoms doesn't have atom map numbers." + ) # Correct the AtomMapNum if updateAtomMap: - [rwmol.GetAtomWithIdx(i).SetAtomMapNum(i + 1) for i in range(rwmol.GetNumAtoms())] + [ + rwmol.GetAtomWithIdx(i).SetAtomMapNum(i + 1) + for i in range(rwmol.GetNumAtoms()) + ] return RDKitMol(rwmol) - def Sanitize(self, - sanitizeOps: Optional[Union[int, 'SanitizeFlags']] = Chem.rdmolops.SANITIZE_ALL): + def Sanitize( + self, + sanitizeOps: Optional[Union[int, "SanitizeFlags"]] = Chem.rdmolops.SANITIZE_ALL, + ): """ Sanitize the molecule. @@ -1247,8 +1309,7 @@ def Sanitize(self, """ Chem.rdmolops.SanitizeMol(self._mol, sanitizeOps) - def SetAtomMapNumbers(self, - atomMap: Optional[Sequence[int]] = None): + def SetAtomMapNumbers(self, atomMap: Optional[Sequence[int]] = None): """ Set the atom mapping number. By defaults, atom indexes are used. It can be helpful when plotting the molecule in a 2D graph. @@ -1259,7 +1320,9 @@ def SetAtomMapNumbers(self, num_atoms = self.GetNumAtoms() if atomMap is not None: if len(atomMap) != num_atoms: - raise ValueError('Invalid atomMap provided. It should have the same length as atom numbers.') + raise ValueError( + "Invalid atomMap provided. It should have the same length as atom numbers." + ) else: # Set a atom map numbers based on the order of atom index # As suggested by the developer of RDKit @@ -1279,25 +1342,25 @@ def GetAtomMapNumbers(self) -> tuple: """ return tuple(atom.GetAtomMapNum() for atom in self.GetAtoms()) - def Reflect(self, - id: int = 0): + def Reflect(self, id: int = 0): """ Reflect the atom coordinates of a molecule, and therefore its mirror image. Args: id (int, optional): The conformer id to reflect. Defaults to ``0``. """ - Chem.rdMolAlign.AlignMol(refMol=self._mol, - prbMol=self._mol, - prbCid=id, - refCid=0, - reflect=True, - maxIters=0,) + Chem.rdMolAlign.AlignMol( + refMol=self._mol, + prbMol=self._mol, + prbCid=id, + refCid=0, + reflect=True, + maxIters=0, + ) - def SetPositions(self, - coords: Union[Sequence, str], - id: int = 0, - header: bool = False): + def SetPositions( + self, coords: Union[Sequence, str], id: int = 0, header: bool = False + ): """ Set the atom positions to one of the conformer. @@ -1309,7 +1372,12 @@ def SetPositions(self, """ if isinstance(coords, str): xyz_lines = coords.splitlines()[2:] if header else coords.splitlines() - coords = np.array([[float(atom) for atom in line.strip().split()[1:]] for line in xyz_lines]) + coords = np.array( + [ + [float(atom) for atom in line.strip().split()[1:]] + for line in xyz_lines + ] + ) try: conf = self.GetConformer(id=id) @@ -1324,7 +1392,7 @@ def SetPositions(self, raise conf.SetPositions(np.array(coords, dtype=float)) - def ToOBMol(self) -> 'openbabel.OBMol': + def ToOBMol(self) -> "openbabel.OBMol": """ Convert ``RDKitMol`` to a ``OBMol``. @@ -1355,13 +1423,14 @@ def ToSDFFile(self, path: str, confId: int = -1): writer.write(self._mol, confId=confId) writer.close() - def ToSmiles(self, - stereo: bool = True, - kekule: bool = False, - canonical: bool = True, - removeAtomMap: bool = True, - removeHs: bool = True, - ) -> str: + def ToSmiles( + self, + stereo: bool = True, + kekule: bool = False, + canonical: bool = True, + removeAtomMap: bool = True, + removeHs: bool = True, + ) -> str: """ Convert RDKitMol to a SMILES string. @@ -1382,14 +1451,14 @@ def ToSmiles(self, for atom in mol.GetAtoms(): atom.SetAtomMapNum(0) - return Chem.rdmolfiles.MolToSmiles(mol, - isomericSmiles=stereo, - kekuleSmiles=kekule, - canonical=canonical) + return Chem.rdmolfiles.MolToSmiles( + mol, isomericSmiles=stereo, kekuleSmiles=kekule, canonical=canonical + ) - def ToInchi(self, - options: str = "", - ) -> str: + def ToInchi( + self, + options: str = "", + ) -> str: """ Convert the RDKitMol to a InChI string using RDKit builtin converter. @@ -1401,11 +1470,12 @@ def ToInchi(self, """ return Chem.rdinchi.MolToInchi(self._mol, options=options)[0] - def ToXYZ(self, - confId: int = -1, - header: bool = True, - comment: str = '', - ) -> str: + def ToXYZ( + self, + confId: int = -1, + header: bool = True, + comment: str = "", + ) -> str: """ Convert ``RDKitMol`` to a xyz string. @@ -1419,14 +1489,19 @@ def ToXYZ(self, """ xyz = Chem.MolToXYZBlock(self._mol, confId) if not header: - xyz = '\n'.join(xyz.splitlines()[2:]) + '\n' + xyz = "\n".join(xyz.splitlines()[2:]) + "\n" elif comment: - xyz = f'{self.GetNumAtoms()}\n{comment}\n' + '\n'.join(xyz.splitlines()[2:]) + '\n' + xyz = ( + f"{self.GetNumAtoms()}\n{comment}\n" + + "\n".join(xyz.splitlines()[2:]) + + "\n" + ) return xyz - def ToMolBlock(self, - confId: int = -1, - ) -> str: + def ToMolBlock( + self, + confId: int = -1, + ) -> str: """ Convert ``RDKitMol`` to a mol block string. @@ -1438,9 +1513,10 @@ def ToMolBlock(self, """ return Chem.MolToMolBlock(self._mol, confId=confId) - def ToAtoms(self, - confId: int = 0, - ) -> Atoms: + def ToAtoms( + self, + confId: int = 0, + ) -> Atoms: """ Convert ``RDKitMol`` to the ``ase.Atoms`` object. @@ -1450,19 +1526,22 @@ def ToAtoms(self, Returns: Atoms: The corresponding ``ase.Atoms`` object. """ - atoms = Atoms(positions=self.GetPositions(id=confId), - numbers=self.GetAtomicNumbers()) + atoms = Atoms( + positions=self.GetPositions(id=confId), + numbers=self.GetAtomicNumbers() + ) atoms.set_initial_magnetic_moments( - [atom.GetNumRadicalElectrons() + 1 - for atom in self.GetAtoms()]) + [atom.GetNumRadicalElectrons() + 1 for atom in self.GetAtoms()] + ) atoms.set_initial_charges( - [atom.GetFormalCharge() - for atom in self.GetAtoms()]) + [atom.GetFormalCharge() for atom in self.GetAtoms()] + ) return atoms - def ToGraph(self, - keep_bond_order: bool = False, - ) -> nx.Graph: + def ToGraph( + self, + keep_bond_order: bool = False, + ) -> nx.Graph: """ Convert RDKitMol to a networkx graph. @@ -1475,15 +1554,15 @@ def ToGraph(self, """ nx_graph = nx.Graph() for atom in self.GetAtoms(): - nx_graph.add_node(atom.GetIdx(), - symbol=atom.GetSymbol(), - atomic_num=atom.GetAtomicNum()) + nx_graph.add_node( + atom.GetIdx(), symbol=atom.GetSymbol(), atomic_num=atom.GetAtomicNum() + ) for bond in self.GetBonds(): bond_type = 1 if not keep_bond_order else bond.GetBondTypeAsDouble() - nx_graph.add_edge(bond.GetBeginAtomIdx(), - bond.GetEndAtomIdx(), - bond_type=bond_type) + nx_graph.add_edge( + bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), bond_type=bond_type + ) return nx_graph @@ -1496,9 +1575,10 @@ def GetFormalCharge(self) -> int: """ return Chem.GetFormalCharge(self._mol) - def GetInternalCoordinates(self, - nonredundant: bool = True, - ) -> list: + def GetInternalCoordinates( + self, + nonredundant: bool = True, + ) -> list: """ Get internal coordinates of the molecule. @@ -1508,11 +1588,13 @@ def GetInternalCoordinates(self, Returns: list: A list of internal coordinates. """ - bonds, angles, torsions = get_internal_coords(self.ToOBMol(), - nonredundant=nonredundant) - return [[[element - 1 for element in item] - for item in ic] - for ic in [bonds, angles, torsions]] + bonds, angles, torsions = get_internal_coords( + self.ToOBMol(), nonredundant=nonredundant + ) + return [ + [[element - 1 for element in item] for item in ic] + for ic in [bonds, angles, torsions] + ] def GetSpinMultiplicity(self) -> int: """ @@ -1527,10 +1609,11 @@ def GetSpinMultiplicity(self) -> int: num_radical_elec += atom.GetNumRadicalElectrons() return num_radical_elec + 1 - def GetTorsionTops(self, - torsion: Iterable, - allowNonbondPivots: bool = False, - ) -> tuple: + def GetTorsionTops( + self, + torsion: Iterable, + allowNonbondPivots: bool = False, + ) -> tuple: """ Generate tops for the given torsion. Top atoms are defined as atoms on one side of the torsion. The mol should be in one-piece when using this function, otherwise, the results will be @@ -1553,7 +1636,9 @@ def GetTorsionTops(self, # Get the shortest path between pivot atoms # There should be only one path connecting pivot atoms # Otherwise, they may not be torsions - connecting_atoms = list(Chem.rdmolops.GetShortestPath(self._mol, pivot[0], pivot[1])) + connecting_atoms = list( + Chem.rdmolops.GetShortestPath(self._mol, pivot[0], pivot[1]) + ) [connecting_atoms.remove(i) for i in pivot] bond_idx = [] # Mark bonds to be cut @@ -1564,21 +1649,23 @@ def GetTorsionTops(self, bond_idx.append(self.GetBondBetweenAtoms(i, n).GetIdx()) break else: - raise ValueError(f'Atom {pivot[0]} and {pivot[1]} are not bonded.') + raise ValueError(f"Atom {pivot[0]} and {pivot[1]} are not bonded.") # Cut bonds connecting pivots split_mol = Chem.rdmolops.FragmentOnBonds(self._mol, bond_idx, addDummies=False) # Generate the indexes for each fragment from the cutting - frags = Chem.rdmolops.GetMolFrags(split_mol, asMols=False, sanitizeFrags=False,) + frags = Chem.rdmolops.GetMolFrags( + split_mol, + asMols=False, + sanitizeFrags=False, + ) if len(frags) == 2: return frags # only remain the fragment that containing pivot atoms return tuple(frag for i in pivot for frag in frags if i in frag) - def SaturateBiradicalSites12(self, - multiplicity: int, - verbose: bool = True): + def SaturateBiradicalSites12(self, multiplicity: int, verbose: bool = True): """ A method help to saturate 1,2 biradicals to match the given molecule spin multiplicity. E.g.:: @@ -1599,13 +1686,17 @@ def SaturateBiradicalSites12(self, return elif cur_multiplicity < multiplicity: if verbose: - print('It is not possible to match the multiplicity ' - 'by saturating 1,2 biradical sites.') + print( + "It is not possible to match the multiplicity " + "by saturating 1,2 biradical sites." + ) return elif (cur_multiplicity - multiplicity) % 2: if verbose: - print('It is not possible to match the multiplicity ' - 'by saturating 1,2 biradical sites.') + print( + "It is not possible to match the multiplicity " + "by saturating 1,2 biradical sites." + ) return num_dbs = (cur_multiplicity - multiplicity) / 2 @@ -1617,12 +1708,14 @@ def SaturateBiradicalSites12(self, rad_atoms.append(atom.GetIdx()) # a list to record connected atom pairs to avoid repeating - connected = [(i, j) for i, j in combinations(rad_atoms, 2) - if self.GetBondBetweenAtoms(i, j)] + connected = [ + (i, j) + for i, j in combinations(rad_atoms, 2) + if self.GetBondBetweenAtoms(i, j) + ] # Naive way to saturate 1,2 biradicals while num_dbs and connected: - for i, j in connected: bond = self.GetBondBetweenAtoms(i, j) @@ -1648,20 +1741,22 @@ def SaturateBiradicalSites12(self, # TODO: Change this to a log in the future break num_dbs -= 1 - connected = [pair for pair in connected - if pair[0] in rad_atoms and pair[1] in rad_atoms] + connected = [ + pair + for pair in connected + if pair[0] in rad_atoms and pair[1] in rad_atoms + ] # Update things including explicity / implicit valence, etc. self.UpdatePropertyCache(strict=False) if num_dbs: if verbose: - print('Cannot match the multiplicity by saturating 1,2 biradical.') + print("Cannot match the multiplicity by saturating 1,2 biradical.") - def SaturateBiradicalSitesCDB(self, - multiplicity: int, - chain_length: int = 8, - verbose: bool = True): + def SaturateBiradicalSitesCDB( + self, multiplicity: int, chain_length: int = 8, verbose: bool = True + ): """ A method help to saturate biradicals that have conjugated double bond in between to match the given molecule spin multiplicity. E.g, 1,4 biradicals can be saturated @@ -1686,13 +1781,17 @@ def SaturateBiradicalSitesCDB(self, return elif cur_multiplicity < multiplicity: if verbose: - print('It is not possible to match the multiplicity ' - 'by saturating conjugated biradical sites.') + print( + "It is not possible to match the multiplicity " + "by saturating conjugated biradical sites." + ) return elif (cur_multiplicity - multiplicity) % 2: if verbose: - print('It is not possible to match the multiplicity ' - 'by saturating conjugatged biradical sites.') + print( + "It is not possible to match the multiplicity " + "by saturating conjugatged biradical sites." + ) return num_dbs = (cur_multiplicity - multiplicity) / 2 @@ -1708,15 +1807,18 @@ def SaturateBiradicalSitesCDB(self, # Find all paths satisfy *C -[- C = C -]n- C* # n = 1, 2, 3 is corresponding to chain length = 4, 6, 8 for path_length in range(4, chain_length + 1, 2): - if not num_dbs: # problem solved in the previous run break - all_paths = [list(p) for p in - list(Chem.rdmolops.FindAllPathsOfLengthN(self._mol, - path_length, - useBonds=False))] + all_paths = [ + list(p) + for p in list( + Chem.rdmolops.FindAllPathsOfLengthN( + self._mol, path_length, useBonds=False + ) + ) + ] if not all_paths: # empty all_paths means cannot find chains such long break @@ -1725,8 +1827,7 @@ def SaturateBiradicalSitesCDB(self, for path in all_paths: if path[0] in rad_atoms and path[-1] in rad_atoms: for sec in range(1, path_length - 2, 2): - bond = self.GetBondBetweenAtoms(path[sec], - path[sec + 1]) + bond = self.GetBondBetweenAtoms(path[sec], path[sec + 1]) if bond.GetBondTypeAsDouble() not in [2, 3]: break else: @@ -1734,11 +1835,15 @@ def SaturateBiradicalSitesCDB(self, while num_dbs and paths: for path in paths: - bonds = [self.GetBondBetweenAtoms(*path[i:i + 2]) - for i in range(path_length - 1)] + bonds = [ + self.GetBondBetweenAtoms(*path[i: i + 2]) + for i in range(path_length - 1) + ] - new_bond_types = [ORDERS.get(bond.GetBondTypeAsDouble() + (-1) ** i) - for i, bond in enumerate(bonds)] + new_bond_types = [ + ORDERS.get(bond.GetBondTypeAsDouble() + (-1) ** i) + for i, bond in enumerate(bonds) + ] if any([bond_type is None for bond_type in new_bond_types]): # Although a match is found, cannot decide what bond to make, pass @@ -1748,7 +1853,10 @@ def SaturateBiradicalSitesCDB(self, bond.SetBondType(bond_type) # Modify radical site properties - for atom in [self.GetAtomWithIdx(path[0]), self.GetAtomWithIdx(path[-1])]: + for atom in [ + self.GetAtomWithIdx(path[0]), + self.GetAtomWithIdx(path[-1]), + ]: # Decrease radical electron by 1 atom.SetNumRadicalElectrons(atom.GetNumRadicalElectrons() - 1) # remove it from the list if it is no longer a radical @@ -1761,19 +1869,22 @@ def SaturateBiradicalSitesCDB(self, # if no update is made at all, break # TODO: Change this to a log in the future break - paths = [path for path in paths - if path[0] in rad_atoms and path[-1] in rad_atoms] + paths = [ + path + for path in paths + if path[0] in rad_atoms and path[-1] in rad_atoms + ] - # Update things including explicity / implicit valence, etc. + # Update things including explicity / implicit valence, etc. self.UpdatePropertyCache(strict=False) if num_dbs: if verbose: - print('Cannot match the multiplicity by saturating biradical with conjugated double bonds.') + print( + "Cannot match the multiplicity by saturating biradical with conjugated double bonds." + ) - def SaturateCarbene(self, - multiplicity: int, - verbose: bool = True): + def SaturateCarbene(self, multiplicity: int, verbose: bool = True): """ A method help to saturate carbenes and nitrenes to match the given molecule spin multiplicity:: @@ -1794,20 +1905,26 @@ def SaturateCarbene(self, return elif cur_multiplicity < multiplicity: if verbose: - print('It is not possible to match the multiplicity ' - 'by saturating carbene sites.') + print( + "It is not possible to match the multiplicity " + "by saturating carbene sites." + ) return elif (cur_multiplicity - multiplicity) % 2: if verbose: - print('It is not possible to match the multiplicity ' - 'by saturating carbene sites.') + print( + "It is not possible to match the multiplicity " + "by saturating carbene sites." + ) return elec_to_pair = cur_multiplicity - multiplicity - carbene_atoms = self.GetSubstructMatches(CARBENE_PATTERN) # Import from rdmc.utils + carbene_atoms = self.GetSubstructMatches( + CARBENE_PATTERN + ) # Import from rdmc.utils if not carbene_atoms: if verbose: - print('There is no carbene site in the molecule.') + print("There is no carbene site in the molecule.") return for atom in carbene_atoms: @@ -1820,12 +1937,13 @@ def SaturateCarbene(self, if elec_to_pair: if verbose: - print('Cannot match the multiplicity by saturating carbene(-like) atoms') + print( + "Cannot match the multiplicity by saturating carbene(-like) atoms" + ) - def SaturateMol(self, - multiplicity: int, - chain_length: int = 8, - verbose: bool = False): + def SaturateMol( + self, multiplicity: int, chain_length: int = 8, verbose: bool = False + ): """ A method help to saturate the molecule to match the given molecule spin multiplicity. This is just a wrapper to call @@ -1848,20 +1966,22 @@ def SaturateMol(self, verbose (bool): Whether to print intermediate information. Defaults to ``False``. """ - self.SaturateBiradicalSites12(multiplicity=multiplicity, - verbose=verbose) - self.SaturateBiradicalSitesCDB(multiplicity=multiplicity, - chain_length=chain_length, - verbose=verbose) + self.SaturateBiradicalSites12(multiplicity=multiplicity, verbose=verbose) + self.SaturateBiradicalSitesCDB( + multiplicity=multiplicity, chain_length=chain_length, verbose=verbose + ) self.SaturateCarbene(multiplicity=multiplicity, verbose=verbose) if verbose and self.GetSpinMultiplicity() != multiplicity: # TODO: make print a log - print('SaturateMol fails after trying all methods and you need to be cautious about the generated mol.') + print( + "SaturateMol fails after trying all methods and you need to be cautious about the generated mol." + ) - def GetClosedShellMol(self, - cheap: bool = False, - sanitize: bool = True, - ) -> 'RDKitMol': + def GetClosedShellMol( + self, + cheap: bool = False, + sanitize: bool = True, + ) -> "RDKitMol": """ Get a closed shell molecule by removing all radical electrons and adding H atoms to these radical sites. This method currently only work for radicals @@ -1889,9 +2009,7 @@ def GetClosedShellMol(self, mol_copy.Sanitize() return mol_copy - def SetVdwMatrix(self, - threshold: float = 0.4, - vdw_radii: dict = VDW_RADII): + def SetVdwMatrix(self, threshold: float = 0.4, vdw_radii: dict = VDW_RADII): """ Set the derived Van der Waals matrix, which is an upper triangle matrix calculated from a threshold usually around ``0.4`` of the Van der Waals Radii. @@ -1913,9 +2031,7 @@ def SetVdwMatrix(self, self._vdw_mat = generate_vdw_mat(self, threshold, vdw_radii) -def parse_xyz_or_smiles_list(mol_list, - with_3d_info: bool = False, - **kwargs): +def parse_xyz_or_smiles_list(mol_list, with_3d_info: bool = False, **kwargs): """ A helper function to parse xyz and smiles and list if the conformational information is provided. @@ -1939,7 +2055,7 @@ def parse_xyz_or_smiles_list(mol_list, try: rd_mol = RDKitMol.FromXYZ(mol, **kwargs) except ValueError: - rd_mol = RDKitMol.FromSmiles(mol,) + rd_mol = RDKitMol.FromSmiles(mol) rd_mol.EmbedConformer() is_3D.append(False) else: @@ -1954,9 +2070,7 @@ def parse_xyz_or_smiles_list(mol_list, return mols -def generate_vdw_mat(rd_mol, - threshold: float = 0.4, - vdw_radii: dict = VDW_RADII): +def generate_vdw_mat(rd_mol, threshold: float = 0.4, vdw_radii: dict = VDW_RADII): """ Generate a derived Van der Waals matrix, which is an upper triangle matrix calculated from a threshold usually around 0.4 of the Van der Waals Radii. @@ -1990,10 +2104,10 @@ def generate_vdw_mat(rd_mol, for atom2_ind in atom_idx_list[atom1_ind + 1:]: if atom2_ind in bonded_atom_number: # set a small value for bonded atoms, so they won't arise the collision detector - vdw_mat[atom1_ind, atom2_ind] = 0. + vdw_mat[atom1_ind, atom2_ind] = 0.0 else: atom2 = rd_mol.GetAtomWithIdx(atom2_ind) - vdw_mat[atom1_ind, atom2_ind] = threshold * \ - (vdw_radii[atom1.GetAtomicNum()] - + vdw_radii[atom2.GetAtomicNum()]) + vdw_mat[atom1_ind, atom2_ind] = threshold \ + * (vdw_radii[atom1.GetAtomicNum()] + + vdw_radii[atom2.GetAtomicNum()]) return vdw_mat From d15649f357076cdc33ca7651679186e44bc3b8f7 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 27 Oct 2023 15:52:55 -0400 Subject: [PATCH 10/33] Add unit tests for HasSameConnectivity and GetClosedShellMol --- test/test_mol.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/test/test_mol.py b/test/test_mol.py index 88d836f1..19b87d22 100644 --- a/test/test_mol.py +++ b/test/test_mol.py @@ -825,6 +825,33 @@ def test_get_finger_print(self): .GetCountFingerprintAsNumPy(Chem.MolFromSmiles(smi)) assert np.isclose(fp, fp_expect).all() + @pytest.mark.parametrize( + 'rad_smi, expect_smi', + [ + ('[CH3]', 'C'), + ('c1[c]cccc1', 'c1ccccc1'), + ('C[NH2]', 'CN'), + ('[CH2]C[CH2]', 'CCC') + ]) + @pytest.mark.parametrize('cheap', [(True,), (False,)]) + def test_get_closed_shell_mol(self, rad_smi, expect_smi, cheap): + + rad_mol = RDKitMol.FromSmiles(rad_smi) + assert rad_mol.GetClosedShellMol(cheap=cheap).ToSmiles() == expect_smi + + @pytest.mark.parametrize( + 'smi1, smi2, expect_match', + [ + ('[CH3]', 'C', False), + ('[O:1]([H:2])[H:3]', '[O:1]([H:2])[H:3]', True), + ('[O:1]([H:2])[H:3]', '[O:2]([H:1])[H:3]', False), + ('[H:1][C:2](=[O:3])[O:4]', '[H:1][C:2]([O:3])=[O:4]', True) + ]) + def test_has_same_connectivity(self, smi1, smi2, expect_match): + mo1 = RDKitMol.FromSmiles(smi1) + mol2 = RDKitMol.FromSmiles(smi2) + assert mo1.HasSameConnectivity(mol2) == expect_match + def test_parse_xyz_or_smiles_list(): """ From 7375717f1688d43a7a43bd80117d6d90ddecdafd Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 27 Oct 2023 16:08:06 -0400 Subject: [PATCH 11/33] add document rst for fix and mol_compare modules --- docs/source/reference/fix.rst | 7 +++++++ docs/source/reference/mol_compare.rst | 7 +++++++ docs/source/reference/rdmc.rst | 8 +++++--- 3 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 docs/source/reference/fix.rst create mode 100644 docs/source/reference/mol_compare.rst diff --git a/docs/source/reference/fix.rst b/docs/source/reference/fix.rst new file mode 100644 index 00000000..94276023 --- /dev/null +++ b/docs/source/reference/fix.rst @@ -0,0 +1,7 @@ +rdmc.fix +======== + +.. automodule:: rdmc.fix + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/mol_compare.rst b/docs/source/reference/mol_compare.rst new file mode 100644 index 00000000..3bcae2f8 --- /dev/null +++ b/docs/source/reference/mol_compare.rst @@ -0,0 +1,7 @@ +rdmc.mol_compare +================ + +.. automodule:: rdmc.mol_compare + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/reference/rdmc.rst b/docs/source/reference/rdmc.rst index a135176c..40f45877 100644 --- a/docs/source/reference/rdmc.rst +++ b/docs/source/reference/rdmc.rst @@ -18,10 +18,12 @@ RDMC APIs .. toctree:: :maxdepth: 4 - conf - forcefield mol + mol_compare + fix reaction ts - utils view + conf + forcefield + utils From 32bef0680ac3616c16b871a365df572e0cb75647 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Fri, 27 Oct 2023 22:02:26 -0400 Subject: [PATCH 12/33] Temporary: skip charge separation resonance structures --- rdmc/resonance/resonance.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/rdmc/resonance/resonance.py b/rdmc/resonance/resonance.py index e8c0b2b7..53a233ca 100644 --- a/rdmc/resonance/resonance.py +++ b/rdmc/resonance/resonance.py @@ -76,6 +76,7 @@ def generate_radical_resonance_structures( # Post-processing resonance structures cleaned_mols = [] for res_mol in res_mols: + discard_flag = False for atom in res_mol.GetAtoms(): # Convert positively charged species back to radical species charge = atom.GetFormalCharge() @@ -83,10 +84,12 @@ def generate_radical_resonance_structures( recipe[atom.GetIdx()] = radical_electrons atom.SetFormalCharge(0) atom.SetNumRadicalElectrons(charge) - elif charge < 0: # Shouldn't appear, just for bug detection - raise RuntimeError( - "Encounter charge separation during resonance structure generation." - ) + elif charge < 0: + # Known case: O=CC=C -> [O-]C=C[C+] + # Discard such resonance structures + discard_flag = True + if discard_flag: + continue # If a structure cannot be sanitized, removed it try: From c786d97135aacac78293c4b5e7625d7c90df0604 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Tue, 31 Oct 2023 17:31:29 -0400 Subject: [PATCH 13/33] Enable RenumberAtoms taking dict as input This provides extra benefits in terms of implementation. E.g., one only wants to swap the index of two atoms can now do mol.RenumberAtoms({0:2, 2:0}) instead of provide the full list. --- rdmc/mol.py | 25 +++++++++++++++++++------ test/test_mol.py | 7 +++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/rdmc/mol.py b/rdmc/mol.py index f65a6be4..f47f3294 100644 --- a/rdmc/mol.py +++ b/rdmc/mol.py @@ -1260,18 +1260,25 @@ def RemoveHs(self, sanitize: bool = True): def RenumberAtoms( self, - newOrder: Optional[list] = None, + newOrder: Optional[Union[dict, list]] = None, updateAtomMap: bool = True, ) -> "RDKitMol": """ Return a new copy of RDKitMol that has atom (index) reordered. Args: - newOrder (list, optional): the new ordering the atoms (should be numAtoms long). E.g, - if newOrder is ``[3,2,0,1]``, then atom ``3`` in the original molecule - will be atom ``0`` in the new one. If no value provided, then the molecule - will be renumbered based on the current atom map numbers. The latter is helpful - when the sequence of atom map numbers and atom indexes are inconsistent. + newOrder (list or dict, optional): The new ordering the atoms (should be numAtoms long). + - If provided as a list, it should a list of atom indexes. + E.g., if newOrder is ``[3,2,0,1]``, then atom ``3`` + in the original molecule will be atom ``0`` in the new one. + - If provided as a dict, it should be a mapping between atoms. E.g., + if newOrder is ``{0: 3, 1: 2, 2: 0, 3: 1}``, then atom ``0`` in the + original molecule will be atom ``3`` in the new one. Unlike the list case, + the newOrder can be a partial mapping, but one should make sure all the pairs + are included. E.g., ``{0: 3, 3: 0}``. + - If no value provided (default), then the molecule + will be renumbered based on the current atom map numbers. The latter is helpful + when the sequence of atom map numbers and atom indexes are inconsistent. updateAtomMap (bool): Whether to update the atom map number based on the new order. Defaults to ``True``. @@ -1281,6 +1288,12 @@ def RenumberAtoms( if newOrder is None: newOrder = reverse_map(self.GetAtomMapNumbers()) try: + if isinstance(newOrder, dict): + newOrder_ = list(range(self.GetNumAtoms())) + for pair in newOrder.items(): + newOrder_[pair[1]] = pair[0] + newOrder = newOrder_ + rwmol = Chem.rdmolops.RenumberAtoms(self._mol, newOrder) except RuntimeError: raise ValueError( diff --git a/test/test_mol.py b/test/test_mol.py index 19b87d22..a97f1c3c 100644 --- a/test/test_mol.py +++ b/test/test_mol.py @@ -573,6 +573,13 @@ def test_renumber_atoms(self): assert renumbered.GetAtomMapNumbers() == (1, 2, 3, 4, 5) assert renumbered.GetAtomicNumbers() == [1, 1, 1, 1, 6] + '[C:1]([H:2])([H:3])([H:4])[H:5]' + ref_mol = RDKitMol.FromSmiles(smi) + # Renumber molecule with a dict + renumbered = ref_mol.RenumberAtoms({0: 4, 4: 0}, updateAtomMap=True) + assert renumbered.GetAtomMapNumbers() == (1, 2, 3, 4, 5) + assert renumbered.GetAtomicNumbers() == [1, 1, 1, 1, 6] + def test_copy(self): """ Test copy molecule functionality. From 4a555a6a8e2dda601da9cb1091373f734a0e20dc Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 1 Nov 2023 10:10:40 -0400 Subject: [PATCH 14/33] Add a more insightful error raise in FromSmiles The current Chem.MolFromSmiles will silently generate a None object if the smiles is not valid (e.g., handwritten), and the None object will raise Attribute Error in the following steps. It is more insightful to raise a valueError instead to indicate the SMILES is not valid. --- rdmc/mol.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rdmc/mol.py b/rdmc/mol.py index f47f3294..b43a482b 100644 --- a/rdmc/mol.py +++ b/rdmc/mol.py @@ -509,8 +509,12 @@ def FromSmiles( # (e.g., [C+:1]#[C:2][C:3]1=[C:7]([H:10])[N-:6][O:5][C:4]1([H:8])[H:9]), # no hydrogens are automatically added. So, we need to add H atoms. if not removeHs and addHs: - mol.UpdatePropertyCache(strict=False) - mol = Chem.rdmolops.AddHs(mol) + try: + mol.UpdatePropertyCache(strict=False) + mol = Chem.rdmolops.AddHs(mol) + except AttributeError: + # Caused by mol being None + raise ValueError("The provided SMILES is not valid. Please double check.") # Create RDKitMol mol = cls(mol, keepAtomMap=keepAtomMap) From de9024b3f3665d59f0166489abc3f56ef59ecf81 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 1 Nov 2023 10:22:20 -0400 Subject: [PATCH 15/33] Create a new method for get substructure match and recipe This method uses the default substructure match and create a recipe. The recipe is used to transform the provided mol to the current mol --- rdmc/mol.py | 18 +++++++++++++++ rdmc/utils.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ test/test_mol.py | 23 +++++++++++++++++++ 3 files changed, 101 insertions(+) diff --git a/rdmc/mol.py b/rdmc/mol.py index b43a482b..a6452a63 100644 --- a/rdmc/mol.py +++ b/rdmc/mol.py @@ -1061,6 +1061,24 @@ def GetSubstructMatches( query, uniquify, useChirality, useQueryQueryMatches, maxMatches ) + def GetSubStructMatchAndRecipe( + self, + mol: 'RDKitMol' + ) -> Tuple[tuple, dict]: + """ + Get the substructure match between two molecules and the recipe to recover + the pattern `mol`. If swapping the atom indices in mol according to the recipe, + mol2 should be the same as mol1. + + Args: + mol (RDKitMol): The molecule to compare with. + + Returns: + tuple: The substructure match. + dict: A truncated atom mapping of mol2 to mol1. + """ + return get_substruct_match_and_recover_recipe(self._mol, mol._mol) + def GetMolFrags( self, asMols: bool = False, diff --git a/rdmc/utils.py b/rdmc/utils.py index 1ebabcac..1eab105e 100644 --- a/rdmc/utils.py +++ b/rdmc/utils.py @@ -738,6 +738,66 @@ def get_closed_shell_by_add_hs( return mol +def get_substruct_match_and_recover_recipe( + mol1: "RWMol", + mol2: "RWMol", +) -> tuple[tuple, dict]: + """ + Get the substructure match between two molecules and the recipe to recover + mol2 to mol1. If swapping the atom indices in mol2 according to the recipe, + mol2 should be the same as mol1. + + Args: + mol1 (RWMol): The first molecule. + mol2 (RWMol): The second molecule. + + Returns: + tuple: The substructure match. + dict: A truncated atom mapping of mol2 to mol1. + """ + match = mol1.GetSubstructMatch(mol2) + recipe = {i: j for i, j in enumerate(match) if i != j} + + if len(recipe) == 0: + # Either mol1 and mol2 has identical graph or no match at all + return match, recipe + + # The default GetSubstructMatch may not always return the simplest mapping + # The following implements a naive algorithm fixing the issue caused by equivalent + # hydrogens. The idea is that if two hydrogens are equivalent, they are able to + # be mapped to the same atom in mol1. + + # Find equivalent hydrogens + hs = [i for i in recipe.keys() if mol1.GetAtomWithIdx(i).GetAtomicNum() == 1] + equivalent_hs = [] + checked_hs = set() + + for i in range(len(hs)): + if i in checked_hs: + continue + equivalent_hs.append([hs[i]]) + checked_hs.add(i) + for j in range(i + 1, len(hs)): + if j in checked_hs: + continue + path = Chem.rdmolops.GetShortestPath(mol2, hs[i], hs[j]) + if len(path) == 3: # H1-X2-H3 + equivalent_hs[-1].append(hs[j]) + checked_hs.add(j) + + # Clean up the recipe based on the equivalent hydrogens + # E.g. {2: 13, 12: 2, 13: 12} -> {2: 13, 12: 13} + for group in equivalent_hs: + for i in group: + j = recipe.get(i) + if j is not None and j in group: + recipe[i] = recipe[j] + match[i], match[j] = match[j], j + del recipe[j] + + return match, recipe + + # CPK (Corey-Pauling-Koltun) color scheme, Generated using ChatGPT CPK_COLOR_PALETTE = { "H": (1.00, 1.00, 1.00), diff --git a/test/test_mol.py b/test/test_mol.py index a97f1c3c..f264e67d 100644 --- a/test/test_mol.py +++ b/test/test_mol.py @@ -531,6 +531,29 @@ def test_combined_mol(self): assert 200 == m1.CombineMol(m2, c_product=True).GetNumConformers() assert 200 == m2.CombineMol(m1, c_product=True).GetNumConformers() + def test_get_substruct_match_and_recipe(self): + """ + Test GetSubstructMatchAndRecipe + """ + smi1 = '[C:1]([H:2])([H:3])([H:4])[H:5]' + smi2 = '[H:1][C:2]([H:3])([H:4])[H:5]' + + mol1 = RDKitMol.FromSmiles(smi1) + mol2 = RDKitMol.FromSmiles(smi2) + + match, recipe = mol1.GetSubStructMatchAndRecipe(mol2) + assert match == (1, 0, 2, 3, 4) + assert recipe == {0: 1, 1: 0} + + smi1 = '[C:1]([H:2])([H:3])[H:4].[H:5]' + smi2 = '[H:1][C:2]([H:3])[H:5].[H:4]' + mol1 = RDKitMol.FromSmiles(smi1) + mol2 = RDKitMol.FromSmiles(smi2) + + match, recipe = mol1.GetSubStructMatchAndRecipe(mol2) + assert match == (1, 0, 2, 4, 3) + assert recipe == {0: 1, 1: 0, 3: 4, 4: 3} + def test_renumber_atoms(self): """ Test the functionality of renumber atoms of a molecule. From 170a5466a00e9121c4dafcc3ef831781741eeb89 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 1 Nov 2023 11:33:27 -0400 Subject: [PATCH 16/33] Correct the naming of GetSubstructMatchAndRecipe Previously use "GetSubStruct..." which is different from the convention of the existing ones.. Change it to "GetSubstruct..." --- rdmc/mol.py | 9 +++++---- rdmc/utils.py | 3 ++- test/test_mol.py | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/rdmc/mol.py b/rdmc/mol.py index a6452a63..5549cb4d 100644 --- a/rdmc/mol.py +++ b/rdmc/mol.py @@ -1061,14 +1061,15 @@ def GetSubstructMatches( query, uniquify, useChirality, useQueryQueryMatches, maxMatches ) - def GetSubStructMatchAndRecipe( + def GetSubstructMatchAndRecipe( self, mol: 'RDKitMol' ) -> Tuple[tuple, dict]: """ - Get the substructure match between two molecules and the recipe to recover - the pattern `mol`. If swapping the atom indices in mol according to the recipe, - mol2 should be the same as mol1. + Get the substructure match between two molecules and a recipe to recover + the provide `mol` to the current mol. If swapping the atom indices in `mol` according to the recipe, + the `mol` should have the same connectivity as the current molecule. Note, if no match is found, + the returned match and recipe will be empty. Args: mol (RDKitMol): The molecule to compare with. diff --git a/rdmc/utils.py b/rdmc/utils.py index 1eab105e..a03b3554 100644 --- a/rdmc/utils.py +++ b/rdmc/utils.py @@ -787,6 +787,7 @@ def get_substruct_match_and_recover_recipe( # Clean up the recipe based on the equivalent hydrogens # E.g. {2: 13, 12: 2, 13: 12} -> {2: 13, 12: 13} + match = list(match) for group in equivalent_hs: for i in group: j = recipe.get(i) @@ -795,7 +796,7 @@ def get_substruct_match_and_recover_recipe( match[i], match[j] = match[j], j del recipe[j] - return match, recipe + return tuple(match), recipe # CPK (Corey-Pauling-Koltun) color scheme, Generated using ChatGPT diff --git a/test/test_mol.py b/test/test_mol.py index f264e67d..764d5d39 100644 --- a/test/test_mol.py +++ b/test/test_mol.py @@ -541,7 +541,7 @@ def test_get_substruct_match_and_recipe(self): mol1 = RDKitMol.FromSmiles(smi1) mol2 = RDKitMol.FromSmiles(smi2) - match, recipe = mol1.GetSubStructMatchAndRecipe(mol2) + match, recipe = mol1.GetSubstructMatchAndRecipe(mol2) assert match == (1, 0, 2, 3, 4) assert recipe == {0: 1, 1: 0} @@ -550,7 +550,7 @@ def test_get_substruct_match_and_recipe(self): mol1 = RDKitMol.FromSmiles(smi1) mol2 = RDKitMol.FromSmiles(smi2) - match, recipe = mol1.GetSubStructMatchAndRecipe(mol2) + match, recipe = mol1.GetSubstructMatchAndRecipe(mol2) assert match == (1, 0, 2, 4, 3) assert recipe == {0: 1, 1: 0, 3: 4, 4: 3} From 0d0f4f9e253f2247ea72eb1252cfadc8c981e74a Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 1 Nov 2023 11:34:32 -0400 Subject: [PATCH 17/33] Add method to check equivalent reactions --- rdmc/mol_compare.py | 59 +++++++++++++++++++++++++++++++++++++++++++ rdmc/reaction.py | 25 +++++++++++++++++- test/test_reaction.py | 44 +++++++++++++++++++++++++++++++- 3 files changed, 126 insertions(+), 2 deletions(-) diff --git a/rdmc/mol_compare.py b/rdmc/mol_compare.py index eea940b4..90b43210 100644 --- a/rdmc/mol_compare.py +++ b/rdmc/mol_compare.py @@ -167,3 +167,62 @@ def is_same_complex( else: return False return True + + +def is_equivalent_reaction( + rxn1: "Reaction", + rxn2: "Reaction", +) -> bool: + """ + If two reactions has equivalent transformation regardless of atom mapping. This can be useful when filtering + out duplicate reactions that are generated from different atom mapping. + + Args: + rxn1 (Reaction): The first reaction. + rxn2 (Reaction): The second reaction. + + Returns: + bool: Whether the two reactions are equivalent. + """ + match_r, recipe_r = rxn1.reactant_complex.GetSubstructMatchAndRecipe( + rxn2.reactant_complex + ) + match_p, recipe_p = rxn1.product_complex.GetSubstructMatchAndRecipe( + rxn2.product_complex + ) + + if not match_r or not match_p: + # Reactant and product not matched + return False + + elif recipe_r == recipe_p: + # Both can match and can be recovered with the same recipe, + # meaning we can simply recover the other reaction by swapping the index of these reactions + # Note, this also includes the case recipes are empty, meaning things are perfectly matched + # and no recovery operation is needed. + return True + + elif not recipe_r: + # The reactants are perfectly matched + # Then we need to see if r/p match after the r/p are renumbered based on the product recipe, + new_rxn2_rcomplex = rxn2.reactant_complex.RenumberAtoms(recipe_p) + new_rxn2_pcomplex = rxn2.product_complex.RenumberAtoms(recipe_p) + + elif not recipe_p: + # The products are perfectly matched + # Then we need to see if r/p match after the r/p are renumbered based on the reactant recipe, + new_rxn2_rcomplex = rxn2.reactant_complex.RenumberAtoms(recipe_r) + new_rxn2_pcomplex = rxn2.product_complex.RenumberAtoms(recipe_r) + + else: + # TODO: this condition hasn't been fully investigated and tested yet. + # TODO: Usually this will results in a non-equivalent reaction. + # TODO: for now, we just renumber based on recipe_r + new_rxn2_rcomplex = rxn2.reactant_complex.RenumberAtoms(recipe_r) + new_rxn2_pcomplex = rxn2.product_complex.RenumberAtoms(recipe_r) + + # To check if not recipe is the same + _, recipe_r = rxn1.reactant_complex.GetSubstructMatchAndRecipe(new_rxn2_rcomplex) + _, recipe_p = rxn1.product_complex.GetSubstructMatchAndRecipe(new_rxn2_pcomplex) + + return recipe_r == recipe_p diff --git a/rdmc/reaction.py b/rdmc/reaction.py index 4df357a0..7d734ba5 100644 --- a/rdmc/reaction.py +++ b/rdmc/reaction.py @@ -14,7 +14,7 @@ from rdkit.Chem.Draw import rdMolDraw2D from rdmc import RDKitMol -from rdmc.mol_compare import is_same_complex +from rdmc.mol_compare import is_same_complex, is_equivalent_reaction from rdmc.resonance import generate_radical_resonance_structures from rdmc.ts import get_all_changing_bonds @@ -542,3 +542,26 @@ def is_same_products( bool: Whether the reaction has the same products as the given products or product complex. """ return is_same_complex(self.product_complex, products, resonance=resonance) + + def is_equivalent( + self, + reaction: "Reaction", + both_directions: bool = False, + ) -> bool: + """ + Check if the reaction is equivalent to the given reaction. + + Args: + reaction (Reaction): The reaction to compare. + both_directions (bool, optional): Whether to check both directions. Defaults to ``False``. + + Returns: + bool: Whether the reaction is equivalent to the given reaction. + """ + equiv = is_equivalent_reaction(self, reaction) + + if both_directions and not equiv: + tmp_reaction = self.get_reverse_reaction() + equiv = is_equivalent_reaction(tmp_reaction, reaction) + + return equiv diff --git a/test/test_reaction.py b/test/test_reaction.py index 1788a649..9d939a59 100644 --- a/test/test_reaction.py +++ b/test/test_reaction.py @@ -14,7 +14,7 @@ logging.basicConfig(level=logging.DEBUG) -class TestReaction(unittest.TestCase): +class TestReaction: """ A class used to test basic operations for the RDKitMol Class. """ @@ -152,6 +152,48 @@ def test_num_broken_bonds(self): # Reaction 1 assert self.rxn1.num_broken_bonds == 1 + @pytest.mark.parametrize( + "rxn_smi1, rxn_smi2, expected_forward, expected_both", + [ + # Case 1: the difference is right after the >> sign + ( + "[O:1]([C:2]([C:3]([C:4]([C:5]([C:6]([C:7]([H:18])([H:19])[H:20])([H:16])[H:17])([H:14])[H:15])([H:12])" + "[H:13])[H:11])([H:9])[H:10])[H:8]>>[H:18].[O:1]([C:2]([C:3]1([H:11])[C:4]([H:12])([H:13])[C:5]([H:14])" + "([H:15])[C:6]([H:16])([H:17])[C:7]1([H:19])[H:20])([H:9])[H:10])[H:8]", + "[O:1]([C:2]([C:3]([C:4]([C:5]([C:6]([C:7]([H:18])([H:19])[H:20])([H:16])[H:17])([H:14])[H:15])([H:12])" + "[H:13])[H:11])([H:9])[H:10])[H:8]>>[H:20].[O:1]([C:2]([C:3]1([H:11])[C:4]([H:12])([H:13])[C:5]([H:14])" + "([H:15])[C:6]([H:16])([H:17])[C:7]1([H:18])[H:19])([H:9])[H:10])[H:8]", + True, + True, + ), + # Case 2: the first is H abstraction, the second is H transfer; pay attention to the indexes of O-C=O group + ( + "[H:3][O:8][C:7]([C:5]([C:4]([H:10])([H:11])[H:12])=[C:6]([H:13])[H:14])=[O:9].[O:1]([O:2])[H:15]>>[C:4]([C:5]" + "(=[C:6]([H:13])[H:14])[C:7](=[O:8])[O:9][H:15])([H:10])([H:11])[H:12].[O:1]([O:2])[H:3]", + "[C:4]([C:5](=[C:6]([H:13])[H:14])[C:7](=[O:8])[O:9][H:15])([H:10])([H:11])[H:12].[O:1]([O:2])[H:3]>>[H:3]" + "[O:9][C:7]([C:5]([C:4]([H:10])([H:11])[H:12])=[C:6]([H:13])[H:14])=[O:8].[O:1][O:2][H:15]", + False, + False, + ), + # Case 3: The reverse of the second reaction is equivalent to the first reaction + ( + "[C:1]([C:2]([H:5])([H:6])[O:7][O:8][H:9])([H:3])([H:4])[O:10]>>[C:1]([C:2]([H:5])([H:6])[O:7][O:8][H:9])([H:3])=[O:10].[H:4]", + "[C:1]([C:2]([H:5])([H:6])[O:7][O:8][H:9])([H:4])=[O:10].[H:3]>>[C:1]([C:2]([H:5])([H:6])[O:7][O:8][H:9])([H:3])([H:4])[O:10]", + False, + True, + ), + ], + ) + def test_is_equivalent(self, rxn_smi1, rxn_smi2, expected_forward, expected_both): + """ + Test the is_equivalent method. + """ + rxn1 = Reaction.from_reaction_smiles(rxn_smi1) + rxn2 = Reaction.from_reaction_smiles(rxn_smi2) + + assert rxn1.is_equivalent(rxn2, both_directions=False) == expected_forward + assert rxn1.is_equivalent(rxn2, both_directions=True) == expected_both + if __name__ == '__main__': unittest.main(testRunner=unittest.TextTestRunner(verbosity=3)) From afe13480331951562a9e0f818a26e7bf83671561 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 1 Nov 2023 11:35:26 -0400 Subject: [PATCH 18/33] Update format of fix_reaction.py --- test/test_reaction.py | 86 ++++++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 34 deletions(-) diff --git a/test/test_reaction.py b/test/test_reaction.py index 9d939a59..230be597 100644 --- a/test/test_reaction.py +++ b/test/test_reaction.py @@ -41,14 +41,18 @@ def test_initialize_reaction_1_to_1(self): rxn = Reaction(reactant=reactant, product=product) assert len(rxn.reactant) == 1 assert len(rxn.product) == 1 - assert rxn.reactant[0].ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) - assert rxn.reactant_complex.ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) - assert rxn.product[0].ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) - assert rxn.product_complex.ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.reactant[0].ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.reactant_complex.ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.product[0].ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.product_complex.ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) # Test the initialization of the Reaction class with RDKitMol reactant = self.rmol1 @@ -56,14 +60,18 @@ def test_initialize_reaction_1_to_1(self): rxn = Reaction(reactant=reactant, product=product) assert len(rxn.reactant) == 1 assert len(rxn.product) == 1 - assert rxn.reactant[0].ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) - assert rxn.reactant_complex.ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) - assert rxn.product[0].ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) - assert rxn.product_complex.ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.reactant[0].ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.reactant_complex.ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.product[0].ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.product_complex.ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) def test_from_reaction_smiles_1_to_1(self): """ @@ -73,30 +81,40 @@ def test_from_reaction_smiles_1_to_1(self): rxn = Reaction.from_reaction_smiles(rxn_smiles) assert len(rxn.reactant) == 1 assert len(rxn.product) == 1 - assert rxn.reactant[0].ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) - assert rxn.reactant_complex.ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) - assert rxn.product[0].ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) - assert rxn.product_complex.ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.reactant[0].ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.reactant_complex.ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.product[0].ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.product_complex.ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) def test_from_reactant_product_smiles_1_to_1(self): """ Test the initialization of the Reaction class from reactant and product smiles. """ - rxn = Reaction.from_reactant_and_product_smiles(rsmi=self.rsmi1, psmi=self.psmi1) + rxn = Reaction.from_reactant_and_product_smiles( + rsmi=self.rsmi1, psmi=self.psmi1 + ) assert len(rxn.reactant) == 1 assert len(rxn.product) == 1 - assert rxn.reactant[0].ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) - assert rxn.reactant_complex.ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) - assert rxn.product[0].ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) - assert rxn.product_complex.ToSmiles(removeAtomMap=False, removeHs=False) \ - == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.reactant[0].ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.reactant_complex.ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.rmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.product[0].ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) + assert rxn.product_complex.ToSmiles( + removeAtomMap=False, removeHs=False + ) == self.pmol1.ToSmiles(removeAtomMap=False, removeHs=False) def test_is_num_atoms_balanced(self): """ @@ -195,5 +213,5 @@ def test_is_equivalent(self, rxn_smi1, rxn_smi2, expected_forward, expected_both assert rxn1.is_equivalent(rxn2, both_directions=True) == expected_both -if __name__ == '__main__': +if __name__ == "__main__": unittest.main(testRunner=unittest.TextTestRunner(verbosity=3)) From 3e1125805f2ecd40a650aa6c7fb101348031a5cb Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 1 Nov 2023 11:36:24 -0400 Subject: [PATCH 19/33] Add a flag of is_resonance_corrected to avoid re-computing --- rdmc/reaction.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/rdmc/reaction.py b/rdmc/reaction.py index 7d734ba5..43417e58 100644 --- a/rdmc/reaction.py +++ b/rdmc/reaction.py @@ -329,6 +329,13 @@ def involved_atoms(self) -> List[int]: """ return list(set(chain(*self.involved_bonds))) + @property + def is_resonance_corrected(self) -> bool: + """ + Whether the reaction is resonance corrected. + """ + return getattr(self, '_is_resonance_corrected', False) + def apply_resonance_correction( self, inplace: bool = True, @@ -337,6 +344,11 @@ def apply_resonance_correction( """ Apply resonance correction to the reactant and product complexes. """ + if self.is_resonance_corrected: + # Avoid applying resonance correction multiple times + # TODO: add a auto-clean somewhere to update this flag + # TODO: when the reactant and product are changed + return self try: rcps = generate_radical_resonance_structures( self.reactant_complex, kekulize=kekulize @@ -366,10 +378,15 @@ def apply_resonance_correction( if inplace: self.init_reactant_product(rmol, pmol) self.bond_analysis() + self._is_resonance_corrected = True return self else: # todo: check if ts has 3d coordinates - return Reaction(rmol, pmol, ts=self.ts) + new_rxn = Reaction(rmol, pmol, ts=self.ts) + new_rxn._is_resonance_corrected = True + return new_rxn + + self._is_resonance_corrected = True return self def get_reverse_reaction(self): From 0aac96bb16e2727b145e07222fb29248d0c69515 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 1 Nov 2023 11:41:07 -0400 Subject: [PATCH 20/33] Use typing.Tuple so that it is compatible with python 3.8 --- rdmc/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rdmc/utils.py b/rdmc/utils.py index a03b3554..c0d684af 100644 --- a/rdmc/utils.py +++ b/rdmc/utils.py @@ -5,7 +5,7 @@ This module provides methods that can directly apply to RDKit Mol/RWMol. """ -from typing import Iterable, Union +from typing import Iterable, Tuple, Union import numpy as np @@ -741,7 +741,7 @@ def get_closed_shell_by_add_hs( def get_substruct_match_and_recover_recipe( mol1: "RWMol", mol2: "RWMol", -) -> tuple[tuple, dict]: +) -> Tuple[tuple, dict]: """ Get the substructure match between two molecules and the recipe to recover mol2 to mol1. If swapping the atom indices in mol2 according to the recipe, From 2c465326eb102d28e6f5d64cd6ebe8df25776a86 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 1 Nov 2023 13:01:56 -0400 Subject: [PATCH 21/33] Add a quicker check of bonds before using graphs add bond analysis --- rdmc/mol_compare.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rdmc/mol_compare.py b/rdmc/mol_compare.py index 90b43210..b60cef70 100644 --- a/rdmc/mol_compare.py +++ b/rdmc/mol_compare.py @@ -184,6 +184,13 @@ def is_equivalent_reaction( Returns: bool: Whether the two reactions are equivalent. """ + equiv = (rxn1.num_formed_bonds == rxn2.num_formed_bonds) \ + and (rxn1.num_broken_bonds == rxn2.num_broken_bonds) \ + and (rxn1.num_changed_bonds == rxn2.num_changed_bonds) + + if not equiv: + return False + match_r, recipe_r = rxn1.reactant_complex.GetSubstructMatchAndRecipe( rxn2.reactant_complex ) From 209bdf1910efd6403b03cfcb9f03db83500ad414 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 1 Nov 2023 14:27:55 -0400 Subject: [PATCH 22/33] Add new fixes for peroxide bi-radicals --- rdmc/fix.py | 8 ++++++++ test/test_fix.py | 2 ++ 2 files changed, 10 insertions(+) diff --git a/rdmc/fix.py b/rdmc/fix.py index 4339547f..3cd9d176 100644 --- a/rdmc/fix.py +++ b/rdmc/fix.py @@ -54,6 +54,14 @@ "[C+0-0v5X3:1]1=[N+0-0v4X2:2]=[C+0-0v5X3:3]=[N+0-0v4X2:4]=[C+0-0v5X3:5]=[N+0-0v4X2:6]=1" ">>[C+0-0v5X3:1]1[N+0-0v4X2:2]=[C+0-0v5X3:3][N+0-0v4X2:4]=[C+0-0v5X3:5][N+0-0v4X2:6]=1" ), + # Remedy 11 - peroxide biradical: R[C](R)O[O] to R[C+](R)O[O-] + rdChemReactions.ReactionFromSmarts( + "[C+0-0v3X3:1]-[O+0-0v2X2:2]-[O+0-0v1X1:3]>>[C+1v3X3:1]-[O+0-0v2X2:2]-[O-1v1X1:3]" + ), + # Remedy 12 - conjugate peroxide biradical: [C]-C=C(R)O[O] to C=C-[C+](R)O[O-] + rdChemReactions.ReactionFromSmarts( + "[C+0-0v3X3:1]-[C:2]=[C+0-0v4X3:3]-[O+0-0v2X2:4]-[O+0-0v1X1:5]>>[C+0-0v4X3:1]=[C:2]-[C+1v3X3:3]-[O+0-0v2X2:4]-[O-1v1X1:5]" + ), ] diff --git a/test/test_fix.py b/test/test_fix.py index 85f930c4..b733b2a0 100644 --- a/test/test_fix.py +++ b/test/test_fix.py @@ -16,6 +16,8 @@ ("O=O", "[O][O]"), ("[C]=O", "[C-]#[O+]"), ("CS(C)([O])[O]", "CS(C)(=O)=O"), + ("[CH2]O[O]", "[CH2+]O[O-]"), + ("[CH2]C=CO[O]", "C=C[CH+]O[O-]"), ], ) def test_fix_sanitize_ok(smi, exp_smi): From b98112036966b0674abd40905c8e60f899678bc0 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 1 Nov 2023 16:19:09 -0400 Subject: [PATCH 23/33] BugFix: xyz2mol non-robust behavior if resonance generation fails There are cases ResonanceMolSupplier returns None, causing error in downstream. This commit makes sure, if nothing meaningful is created, at least the mol created previously will be returned. --- rdmc/external/xyz2mol.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/rdmc/external/xyz2mol.py b/rdmc/external/xyz2mol.py index 654b5a34..4f2a94b6 100644 --- a/rdmc/external/xyz2mol.py +++ b/rdmc/external/xyz2mol.py @@ -528,8 +528,14 @@ def AC2mol(mol, AC, atoms, charge, allow_charged_fragments=True, return [] # BO2mol returns an arbitrary resonance form. Let's make the rest - mols = rdchem.ResonanceMolSupplier(mol, Chem.UNCONSTRAINED_CATIONS, Chem.UNCONSTRAINED_ANIONS) - mols = [mol for mol in mols] + mols = [mol for mol in rdchem.ResonanceMolSupplier( + mol, + Chem.ALLOW_INCOMPLETE_OCTETS | Chem.UNCONSTRAINED_CATIONS | Chem.UNCONSTRAINED_ANIONS, + ) if mol is not None] + + if not mols: + mols = [mol] # For some cases, resonance structure supplier creates Nones + return mols From 57d03e354b444174733b24b3f3aafa995dda4cab Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 1 Nov 2023 16:22:03 -0400 Subject: [PATCH 24/33] Deprecate fix_CO in the from_xyz workflow and add it to fix_mol --- rdmc/fix.py | 26 +++++++++++++----------- rdmc/mol.py | 8 ++------ rdmc/utils.py | 49 +--------------------------------------------- test/test_fix.py | 1 + test/test_utils.py | 4 ---- 5 files changed, 19 insertions(+), 69 deletions(-) diff --git a/rdmc/fix.py b/rdmc/fix.py index 3cd9d176..58f10dbc 100644 --- a/rdmc/fix.py +++ b/rdmc/fix.py @@ -17,48 +17,52 @@ rdChemReactions.ReactionFromSmarts( "[O+0-0v2X1:1]=[C+0-0v2X1:2]>>[O+1v3X1:1]#[C-1v3X1:2]" ), - # Remedy 2 - Oxygen Molecule: O=O to [O]-[O] + # Remedy 2 - Carbon monoxide: [C]=O to [C-]#[O+] + rdChemReactions.ReactionFromSmarts( + "[O+0-0v3X1:1]#[C+0-0v3X1:2]>>[O+1v3X1:1]#[C-1v3X1:2]" + ), + # Remedy 3 - Oxygen Molecule: O=O to [O]-[O] rdChemReactions.ReactionFromSmarts( "[O+0-0v2X1:1]=[O+0-0v2X1:2]>>[O+0-0v1X1:1]-[O+0-0v1X1:2]" ), - # Remedy 3 - isocyanide: R[N]#[C] to R[N+]#[C-] + # Remedy 4 - isocyanide: R[N]#[C] to R[N+]#[C-] rdChemReactions.ReactionFromSmarts( "[N+0-0v4X2:1]#[C+0-0v3X1:2]>>[N+v4X2:1]#[C-v3X1:2]" ), - # Remedy 4 - azide: RN=N=[N] to RN=[N+]=[N-] + # Remedy 5 - azide: RN=N=[N] to RN=[N+]=[N-] rdChemReactions.ReactionFromSmarts( "[N+0-0v3X2:1]=[N+0-0v4X2:2]=[N+0-0v2X1:3]>>[N+0-0v3X2:1]=[N+1v4X2:2]=[N-1v2X1:3]" ), - # Remedy 5 - amine oxide: RN(R)(R)-O to R[N+](R)(R)-[O-] + # Remedy 6 - amine oxide: RN(R)(R)-O to R[N+](R)(R)-[O-] rdChemReactions.ReactionFromSmarts( "[N+0-0v4X4:1]-[O+0-0v1X1:2]>>[N+1v4X4:1]-[O-1v1X1:2]" ), - # Remedy 6 - amine radical: R[C](R)-N(R)(R)R to R[C-](R)-[N+](R)(R)R + # Remedy 7 - amine radical: R[C](R)-N(R)(R)R to R[C-](R)-[N+](R)(R)R rdChemReactions.ReactionFromSmarts( "[N+0-0v4X4:1]-[C+0-0v3X3:2]>>[N+1v4X4:1]-[C-1v3X3:2]" ), - # Remedy 7 - amine radical: RN(R)=C to RN(R)-[C] + # Remedy 8 - amine radical: RN(R)=C to RN(R)-[C] rdChemReactions.ReactionFromSmarts( "[N+0-0v4X3:1]=[C+0-0v4X3:2]>>[N+0-0v3X3:1]-[C+0-0v3X3:2]" ), - # Remedy 8 - quintuple C bond, usually due to RC(=O)=O: R=C(R)=O to R=C(R)-[O] + # Remedy 9 - quintuple C bond, usually due to RC(=O)=O: R=C(R)=O to R=C(R)-[O] rdChemReactions.ReactionFromSmarts( "[C+0-0v5X3:1]=[O+0-0v2X1:2]>>[C+0-0v4X3:1]-[O+0-0v1X1:2]" ), - # Remedy 9 - sulphuric bi-radicals: R[S](R)(-[O])-[O] to R[S](R)(=O)(=O) + # Remedy 10 - sulphuric bi-radicals: R[S](R)(-[O])-[O] to R[S](R)(=O)(=O) rdChemReactions.ReactionFromSmarts( "[S+0-0v4X4:1](-[O+0-0v1X1:2])-[O+0-0v1X1:3]>>[S+0-0v6X4:1](=[O+0-0v2X1:2])=[O+0-0v2X1:3]" ), - # Remedy 10 - Triazinane: C1=N=C=N=C=N=1 to c1ncncn1 + # Remedy 11 - Triazinane: C1=N=C=N=C=N=1 to c1ncncn1 rdChemReactions.ReactionFromSmarts( "[C+0-0v5X3:1]1=[N+0-0v4X2:2]=[C+0-0v5X3:3]=[N+0-0v4X2:4]=[C+0-0v5X3:5]=[N+0-0v4X2:6]=1" ">>[C+0-0v5X3:1]1[N+0-0v4X2:2]=[C+0-0v5X3:3][N+0-0v4X2:4]=[C+0-0v5X3:5][N+0-0v4X2:6]=1" ), - # Remedy 11 - peroxide biradical: R[C](R)O[O] to R[C+](R)O[O-] + # Remedy 12 - peroxide biradical: R[C](R)O[O] to R[C+](R)O[O-] rdChemReactions.ReactionFromSmarts( "[C+0-0v3X3:1]-[O+0-0v2X2:2]-[O+0-0v1X1:3]>>[C+1v3X3:1]-[O+0-0v2X2:2]-[O-1v1X1:3]" ), - # Remedy 12 - conjugate peroxide biradical: [C]-C=C(R)O[O] to C=C-[C+](R)O[O-] + # Remedy 13 - conjugate peroxide biradical: [C]-C=C(R)O[O] to C=C-[C+](R)O[O-] rdChemReactions.ReactionFromSmarts( "[C+0-0v3X3:1]-[C:2]=[C+0-0v4X3:3]-[O+0-0v2X2:4]-[O+0-0v1X1:5]>>[C+0-0v4X3:1]=[C:2]-[C+1v3X3:3]-[O+0-0v2X2:4]-[O-1v1X1:5]" ), diff --git a/rdmc/mol.py b/rdmc/mol.py index 5549cb4d..774bf295 100644 --- a/rdmc/mol.py +++ b/rdmc/mol.py @@ -605,7 +605,6 @@ def FromXYZ( xyz: str, backend: str = "openbabel", header: bool = True, - correctCO: bool = True, sanitize: bool = True, embed_chiral: bool = False, **kwargs, @@ -619,7 +618,6 @@ def FromXYZ( Currently, we only support ``'openbabel'`` and ``'jensen'``. header (bool, optional): If lines of the number of atoms and title are included. Defaults to ``True.`` - correctCO (bool, optional): Whether to correct the CO bond as "[C-]#[O+]". Defaults to ``True``. sanitize (bool): Sanitize the RDKit molecule during conversion. Helpful to set it to ``False`` when reading in TSs. Defaults to ``True``. embed_chiral: ``True`` to embed chiral information. Defaults to ``True``. @@ -642,7 +640,7 @@ def FromXYZ( # Openbabel support read xyz and perceive atom connectivities if backend.lower() == "openbabel": - obmol = parse_xyz_by_openbabel(xyz, correct_CO=correctCO) + obmol = parse_xyz_by_openbabel(xyz) rdmol = cls.FromOBMol(obmol, sanitize=sanitize) if embed_chiral: rdmol.AssignStereochemistryFrom3D() @@ -652,7 +650,7 @@ def FromXYZ( # provides an approach to convert xyz to mol elif backend.lower() == "jensen": mol = parse_xyz_by_jensen( - xyz, correct_CO=correctCO, embed_chiral=embed_chiral, **kwargs + xyz, embed_chiral=embed_chiral, **kwargs ) return cls(mol) @@ -690,7 +688,6 @@ def FromFile( path: str, backend: str = "openbabel", header: bool = True, - correctCO: bool = True, removeHs: bool = False, sanitize: bool = True, sameMol: bool = False, @@ -723,7 +720,6 @@ def FromFile( xyz, backend=backend, header=header, - correctCO=correctCO, sanitize=sanitize, **kwargs, ) diff --git a/rdmc/utils.py b/rdmc/utils.py index c0d684af..f7b0f456 100644 --- a/rdmc/utils.py +++ b/rdmc/utils.py @@ -56,11 +56,6 @@ "[!$(*#*)&!D1!H3]-&!@[!$(*#*)&!D1&!H3]" ) -# When perceiving molecules, openbabel will always perceive carbon monoxide as [C]=O -# Needs to correct it by [C-]#[O+] -CO_OPENBABEL_PATTERN = ob.OBSmartsPattern() -CO_OPENBABEL_PATTERN.Init("[Cv2X1]=[OX1]") - # Carbene, nitrene, and atomic oxygen templates. RDKit and Openbabel have difficulty # distinguish their multiplicity when input as SMILES or XYZ CARBENE_PATTERN = Chem.MolFromSmarts("[Cv0,Cv1,Cv2,Nv0,Nv1,Ov0]") @@ -457,38 +452,12 @@ def set_obmol_coords(obmol: ob.OBMol, coords: np.array): atom.SetVector(ob.vector3(*coords[atom_idx].tolist())) -def fix_CO_openbabel(obmol: "Openbabel.OBMol", correct_CO: bool = True): - """ - Fix the CO perception issue for openbabel molecule. - - Args: - obmol (Openbabel.OBMol): The Openbabel molecule instance. - correct_CO (bool, optional): Whether to fix this issue. Defaults to True. - """ - if not correct_CO: - return - CO_OPENBABEL_PATTERN.Match(obmol) - for pair in CO_OPENBABEL_PATTERN.GetUMapList(): - obmol.GetBond(*pair).SetBondOrder(3) - for idx in pair: - atom = obmol.GetAtom(idx) - if atom.GetAtomicNum() == 6: - atom.SetSpinMultiplicity(0) - atom.SetFormalCharge(-1) - elif atom.GetAtomicNum() == 8: - atom.SetSpinMultiplicity(0) - atom.SetFormalCharge(+1) - - -def parse_xyz_by_openbabel(xyz: str, correct_CO: bool = True): +def parse_xyz_by_openbabel(xyz: str): """ Perceive a xyz str using openbabel and generate the corresponding OBMol. Args: xyz (str): A str in xyz format containing atom positions. - correctCO (bool, optional): It is known that openbabel will parse carbon monoxide - as [C]=O instead of [C-]#[O+]. This function contains - a patch to correct that. Defaults to ``True``. Returns: ob.OBMol: An openbabel molecule from the xyz @@ -522,9 +491,6 @@ def parse_xyz_by_openbabel(xyz: str, correct_CO: bool = True): ): obatom.SetSpinMultiplicity(2) - # Correct [C]=O to [C-]#[O+] - fix_CO_openbabel(obmol, correct_CO=correct_CO) - return obmol @@ -620,7 +586,6 @@ def parse_xyz_by_jensen( allow_charged_fragments: bool = False, use_huckel: bool = False, embed_chiral: bool = True, - correct_CO: bool = True, use_atom_maps: bool = False, force_rdmc: bool = False, **kwargs, @@ -634,10 +599,6 @@ def parse_xyz_by_jensen( allow_charged_fragments: ``True`` for charged fragment, ``False`` for radical. Defaults to False. use_huckel: ``True`` to use extended Huckel bond orders to locate bonds. Defaults to False. embed_chiral: ``True`` to embed chiral information. Defaults to True. - correctCO (bool, optional): Defaults to ``True``. - In order to get correct RDKit molecule for carbon monoxide - ([C-]#[O+]), allow_charged_fragments should be forced to ``True``. - This function contains a patch to correct that. use_atom_maps(bool, optional): ``True`` to set atom map numbers to the molecule. Defaults to ``False``. force_rdmc (bool, optional): Defaults to ``False``. In rare case, we may hope to use a tailored version of the Jensen XYZ parser, other than the one available in RDKit. @@ -657,7 +618,6 @@ def parse_xyz_by_jensen( use_huckel=use_huckel, embed_chiral=embed_chiral, use_atom_maps=use_atom_maps, - correct_CO=correct_CO, ) # Version >= 2022.09.1 @@ -684,13 +644,6 @@ def parse_xyz_by_jensen( useHueckel=use_huckel, charge=charge, ) - # A force correction for CO - if ( - correct_CO - and mol.GetNumAtoms() == 2 - and {atom.GetAtomicNum() for atom in mol.GetAtoms()} == {6, 8} - ): - allow_charged_fragments = True rdDetermineBonds.DetermineBondOrders( mol, charge=charge, diff --git a/test/test_fix.py b/test/test_fix.py index b733b2a0..1593f53f 100644 --- a/test/test_fix.py +++ b/test/test_fix.py @@ -28,6 +28,7 @@ def test_fix_sanitize_ok(smi, exp_smi): @pytest.mark.parametrize( "smi, exp_smi", [ + ("[C]#[O]", "[C-]#[O+]"), ("[NH3][O]", "[NH3+][O-]"), ("[CH2][NH3]", "[CH2-][NH3+]"), ("[C]#[NH]", "[C-]#[NH+]"), diff --git a/test/test_utils.py b/test/test_utils.py index 011e76bc..5c7d7d43 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -22,10 +22,6 @@ @pytest.fixture(params=[ - ('[C-]#[O+]', """2 - -C 0.559061 0.000000 0.000000 -O -0.559061 0.000000 0.000000"""), ('C', """5 C 0.005119 -0.010620 0.006014 From f23a30a680096dc2f34ab48ffce83f4b3da0ca56 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 1 Nov 2023 16:25:45 -0400 Subject: [PATCH 25/33] Minor: fix xyz2mol format --- rdmc/external/xyz2mol.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rdmc/external/xyz2mol.py b/rdmc/external/xyz2mol.py index 4f2a94b6..c9d3499b 100644 --- a/rdmc/external/xyz2mol.py +++ b/rdmc/external/xyz2mol.py @@ -536,7 +536,6 @@ def AC2mol(mol, AC, atoms, charge, allow_charged_fragments=True, if not mols: mols = [mol] # For some cases, resonance structure supplier creates Nones - return mols From a61c99aa2d2bc89e53e57623f6139bbe6ec07d55 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 1 Nov 2023 22:25:28 -0400 Subject: [PATCH 26/33] Use C=[O+][O-] as default resonance structure for criegee intermediates --- rdmc/fix.py | 28 ++++++++++++++-------------- test/test_fix.py | 4 ++-- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/rdmc/fix.py b/rdmc/fix.py index 58f10dbc..de954d86 100644 --- a/rdmc/fix.py +++ b/rdmc/fix.py @@ -58,39 +58,39 @@ "[C+0-0v5X3:1]1=[N+0-0v4X2:2]=[C+0-0v5X3:3]=[N+0-0v4X2:4]=[C+0-0v5X3:5]=[N+0-0v4X2:6]=1" ">>[C+0-0v5X3:1]1[N+0-0v4X2:2]=[C+0-0v5X3:3][N+0-0v4X2:4]=[C+0-0v5X3:5][N+0-0v4X2:6]=1" ), - # Remedy 12 - peroxide biradical: R[C](R)O[O] to R[C+](R)O[O-] - rdChemReactions.ReactionFromSmarts( - "[C+0-0v3X3:1]-[O+0-0v2X2:2]-[O+0-0v1X1:3]>>[C+1v3X3:1]-[O+0-0v2X2:2]-[O-1v1X1:3]" - ), - # Remedy 13 - conjugate peroxide biradical: [C]-C=C(R)O[O] to C=C-[C+](R)O[O-] - rdChemReactions.ReactionFromSmarts( - "[C+0-0v3X3:1]-[C:2]=[C+0-0v4X3:3]-[O+0-0v2X2:4]-[O+0-0v1X1:5]>>[C+0-0v4X3:1]=[C:2]-[C+1v3X3:3]-[O+0-0v2X2:4]-[O-1v1X1:5]" - ), ] ZWITTERION_REMEDIES = [ - # Remedy 1 - criegee like molecule: RN(R)(R)-C(R)(R)=O to R[N+](R)(R)-[C](R)(R)-[O-] + # Remedy 1 - criegee Intermediate: R[C](R)O[O] to RC=(R)[O+][O-] + rdChemReactions.ReactionFromSmarts( + "[C+0-0v3X3:1]-[O+0-0v2X2:2]-[O+0-0v1X1:3]>>[C+0-0v4X3:1]=[O+1v3X2:2]-[O-1v1X1:3]" + ), + # Remedy 2 - criegee Intermediate: [C]-C=C(R)O[O] to C=C-C=(R)[O+][O-] + rdChemReactions.ReactionFromSmarts( + "[C+0-0v3X3:1]-[C:2]=[C+0-0v4X3:3]-[O+0-0v2X2:4]-[O+0-0v1X1:5]>>[C+0-0v4X3:1]=[C:2]-[C+0-0v4X3:3]=[O+1v3X2:4]-[O-1v1X1:5]" + ), + # Remedy 3 - criegee like molecule: RN(R)(R)-C(R)(R)=O to R[N+](R)(R)-[C](R)(R)-[O-] rdChemReactions.ReactionFromSmarts( "[N+0-0v4X4:1]-[C+0-0v4X3:2]=[O+0-0v2X1:3]>>[N+1v4X4:1]-[C+0-0v3X3:2]-[O-1v1X1:3]" ), - # Remedy 2 - criegee like molecule: R[N+](R)(R)-[C-](R)(R)[O] to R[N+](R)(R)-[C](R)(R)-[O-] + # Remedy 4 - criegee like molecule: R[N+](R)(R)-[C-](R)(R)[O] to R[N+](R)(R)-[C](R)(R)-[O-] rdChemReactions.ReactionFromSmarts( "[N+1v4X4:1]-[C-1v3X3:2]-[O+0-0v1X1:3]>>[N+1v4X4:1]-[C+0-0v3X3:2]-[O-1v1X1:3]" ), - # Remedy 3 - ammonium + carboxylic: ([N]R4.C(=O)[O]) to ([N+]R4.C(=O)[O-]) + # Remedy 5 - ammonium + carboxylic: ([N]R4.C(=O)[O]) to ([N+]R4.C(=O)[O-]) rdChemReactions.ReactionFromSmarts( "([N+0-0v4X4:1].[O+0-0v2X1:2]=[C+0-0v4X3:3]-[O+0-0v1X1:4])>>([N+1v4X4:1].[O+0-0v2X1:2]=[C+0-0v4X3:3]-[O-1v1X1:4])" ), - # Remedy 4 - ammonium + phosphoric: ([N]R4.P(=O)[O]) to ([N+]R4.P(=O)[O-]) + # Remedy 6 - ammonium + phosphoric: ([N]R4.P(=O)[O]) to ([N+]R4.P(=O)[O-]) rdChemReactions.ReactionFromSmarts( "([N+0-0v4X4:1].[P+0-0v5X4:2]-[O+0-0v1X1:3])>>([N+1v4X4:1].[P+0-0v5X4:2]-[O-1v1X1:3])" ), - # Remedy 5 - ammonium + sulphuric: ([N]R4.S(=O)(=O)[O]) to ([N+]R4.S(=O)(=O)[O-]) + # Remedy 7 - ammonium + sulphuric: ([N]R4.S(=O)(=O)[O]) to ([N+]R4.S(=O)(=O)[O-]) rdChemReactions.ReactionFromSmarts( "([N+0-0v4X4:1].[S+0-0v6X4:2]-[O+0-0v1X1:3])>>([N+1v4X4:1].[S+0-0v6X4:2]-[O-1v1X1:3])" ), - # Remedy 6 - ammonium + carbonyl in ring: ([N]R4.C=O) to ([N+]R4.[C.]-[O-]) + # Remedy 8 - ammonium + carbonyl in ring: ([N]R4.C=O) to ([N+]R4.[C.]-[O-]) rdChemReactions.ReactionFromSmarts( "([N+0-0v4X4:1].[C+0-0v4X3R:2]=[O+0-0v2X1:3])>>([N+1v4X4:1].[C+0-0v3X3R:2]-[O-1v1X1:3])" ), diff --git a/test/test_fix.py b/test/test_fix.py index 1593f53f..ac7c9c93 100644 --- a/test/test_fix.py +++ b/test/test_fix.py @@ -16,8 +16,8 @@ ("O=O", "[O][O]"), ("[C]=O", "[C-]#[O+]"), ("CS(C)([O])[O]", "CS(C)(=O)=O"), - ("[CH2]O[O]", "[CH2+]O[O-]"), - ("[CH2]C=CO[O]", "C=C[CH+]O[O-]"), + ("[CH2]O[O]", "C=[O+][O-]"), + ("[CH2]C=CO[O]", "C=CC=[O+][O-]"), ], ) def test_fix_sanitize_ok(smi, exp_smi): From 98ff29f1f7d765f217f086681a43cb2bd27dace6 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 1 Nov 2023 22:25:42 -0400 Subject: [PATCH 27/33] Add GetHeavyAtoms for mol --- rdmc/mol.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/rdmc/mol.py b/rdmc/mol.py index 774bf295..858dbd8c 100644 --- a/rdmc/mol.py +++ b/rdmc/mol.py @@ -761,6 +761,15 @@ def GetAtoms(self) -> list: """ return [self.GetAtomWithIdx(idx) for idx in range(self.GetNumAtoms())] + def GetHeavyAtoms(self) -> list: + """ + Get heavy atoms of the molecule with the order consistent with the atom indexes. + + Returns: + list: A list of heavy atoms. + """ + return [atom for atom in self.GetAtoms() if atom.GetAtomicNum() != 1] + def GetAtomicNumbers(self): """ Get the Atomic numbers of the molecules. The atomic numbers are sorted by the atom indexes. From 992e12848bfa59375dbca00f9e86df82e53abfe9 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 2 Nov 2023 00:23:42 -0400 Subject: [PATCH 28/33] Add functions to fix molecules with oxonium Openbabel and Jensen XYZ perception algorithms do not perceive oxonium oxygen correctly. This commit try to fix it by adding the missing bonds and correcting the charge --- rdmc/fix.py | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/rdmc/fix.py b/rdmc/fix.py index de954d86..49ecda42 100644 --- a/rdmc/fix.py +++ b/rdmc/fix.py @@ -8,8 +8,10 @@ from functools import reduce from typing import List +import numpy as np + from rdmc import RDKitMol -from rdkit.Chem import rdChemReactions, rdmolops +from rdkit.Chem import BondType, rdChemReactions, rdmolops RECOMMEND_REMEDIES = [ @@ -320,3 +322,67 @@ def fix_mol( mol = mol.RenumberAtoms() return mol + + +def find_oxonium_bonds(mol: "RDKitMol") -> List[tuple]: + """ + Find the potential oxonium atom. + + Args: + mol (RDKitMol): The molecule to be fixed. + + Returns: + List[tuple]: a list of (oxygen atom index, the other atom index). + """ + heavy_idxs = [atom.GetIdx() for atom in mol.GetHeavyAtoms()] + oxygen_idxs = [ + atom.GetIdx() for atom in mol.GetHeavyAtoms() if atom.GetAtomicNum() == 8 + ] + + if len(oxygen_idxs) == 0: + return [] + + dist_mat = mol.GetDistanceMatrix() + dist_mat[oxygen_idxs, oxygen_idxs] = 100 # Set the self distance to a large number + + infer_conn_mat = (dist_mat[oxygen_idxs][:, heavy_idxs] <= 1.8).astype(int) + actual_conn_mat = mol.GetAdjacencyMatrix()[oxygen_idxs][:, heavy_idxs] + + # Find potentially missing bonds + raw_miss_bonds = np.transpose(np.where((infer_conn_mat - actual_conn_mat) == 1)) + miss_bonds = np.unique(raw_miss_bonds, axis=0).tolist() + + return [ + (oxygen_idxs[miss_bond[0]], heavy_idxs[miss_bond[1]]) + for miss_bond in miss_bonds + ] + + +def fix_oxonium_bonds(mol: "RDKitMol"): + """ + Fix the oxonium atom. Openbabel and Jensen perception algorithm do not perceive the oxonium atom correctly. + This is a fix to detect if the molecule contains oxonium atom and fix it. + """ + oxonium_bonds = find_oxonium_bonds(mol) + + if len(oxonium_bonds) == 0: + return mol + + mol = mol.Copy() + for miss_bond in oxonium_bonds: + mol.AddBond(*miss_bond, order=BondType.SINGLE) + + # Usually the connected atom is a radical site + # So, update the number of radical electrons afterward + rad_atom = mol.GetAtomWithIdx(miss_bond[1]) + if rad_atom.GetNumRadicalElectrons() > 0: + rad_atom.SetNumRadicalElectrons(rad_atom.GetNumRadicalElectrons() - 1) + + # This remedy is only used for oxonium + remedies = [ + rdChemReactions.ReactionFromSmarts( + "[O+0-0v3X3:1]-[O+0-0v1X1:2]>>[O+1v3X3:1]-[O-1v1X1:2]" + ), + ] + + return fix_mol(mol, remedies=remedies) From d456a7f0bf292eb7f6966e19a49092f476ca1c1c Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 2 Nov 2023 11:30:43 -0400 Subject: [PATCH 29/33] Improve fix_oxonium_bonds 1. Add an optional argument to control the threshold of missing bond perception 2. Add an optional argument to allow not sanitization for debugging and testing 3. Add an unit test for fix_oxonium_bonds --- rdmc/fix.py | 37 ++++++++++++++++++++++++++++++++----- test/test_fix.py | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 6 deletions(-) diff --git a/rdmc/fix.py b/rdmc/fix.py index 49ecda42..0eb16085 100644 --- a/rdmc/fix.py +++ b/rdmc/fix.py @@ -294,6 +294,7 @@ def fix_mol( max_attempts (int, optional): The maximum number of attempts to fix the molecule. Defaults to ``10``. sanitize (bool, optional): Whether to sanitize the molecule after the fix. Defaults to ``True``. + Using ``False`` is only recommended for debugging and testing. fix_spin_multiplicity (bool, optional): Whether to fix the spin multiplicity of the molecule. Defaults to ``False``. mult (int, optional): The desired spin multiplicity. Defaults to ``0``, which means the lowest possible @@ -324,12 +325,16 @@ def fix_mol( return mol -def find_oxonium_bonds(mol: "RDKitMol") -> List[tuple]: +def find_oxonium_bonds( + mol: "RDKitMol", + threshold: float = 1.65, +) -> List[tuple]: """ Find the potential oxonium atom. Args: mol (RDKitMol): The molecule to be fixed. + threshold (float, optional): The threshold to determine if two atoms are connected. Returns: List[tuple]: a list of (oxygen atom index, the other atom index). @@ -345,7 +350,9 @@ def find_oxonium_bonds(mol: "RDKitMol") -> List[tuple]: dist_mat = mol.GetDistanceMatrix() dist_mat[oxygen_idxs, oxygen_idxs] = 100 # Set the self distance to a large number - infer_conn_mat = (dist_mat[oxygen_idxs][:, heavy_idxs] <= 1.8).astype(int) + # A detailed check may be done by element type + # for now we will use the threshold based on the longest C-O bond 1.65 A + infer_conn_mat = (dist_mat[oxygen_idxs][:, heavy_idxs] <= threshold).astype(int) actual_conn_mat = mol.GetAdjacencyMatrix()[oxygen_idxs][:, heavy_idxs] # Find potentially missing bonds @@ -358,12 +365,25 @@ def find_oxonium_bonds(mol: "RDKitMol") -> List[tuple]: ] -def fix_oxonium_bonds(mol: "RDKitMol"): +def fix_oxonium_bonds( + mol: "RDKitMol", + threshold: float = 1.65, + sanitize: bool = True, +) -> 'RDKitMol': """ Fix the oxonium atom. Openbabel and Jensen perception algorithm do not perceive the oxonium atom correctly. This is a fix to detect if the molecule contains oxonium atom and fix it. + + Args: + mol (RDKitMol): The molecule to be fixed. + threshold (float, optional): The threshold to determine if two atoms are connected. + sanitize (bool, optional): Whether to sanitize the molecule after the fix. Defaults to ``True``. + Using ``False`` is only recommended for debugging and testing. + + Returns: + RDKitMol: The fixed molecule. """ - oxonium_bonds = find_oxonium_bonds(mol) + oxonium_bonds = find_oxonium_bonds(mol, threshold=threshold) if len(oxonium_bonds) == 0: return mol @@ -380,9 +400,16 @@ def fix_oxonium_bonds(mol: "RDKitMol"): # This remedy is only used for oxonium remedies = [ + # Remedy 1 - R[O](R)[O] to R[O+](R)[O-] + # This is a case combining two radicals R-O-[O] and [R] rdChemReactions.ReactionFromSmarts( "[O+0-0v3X3:1]-[O+0-0v1X1:2]>>[O+1v3X3:1]-[O-1v1X1:2]" ), + # Remedy 2 - R[O](R)C(R)=O to R[O+](R)[C](R)[O-] + # This is a case combining a closed shell ROR with a radical R[C]=O + rdChemReactions.ReactionFromSmarts( + "[O+0-0v3X3:1]-[C+0-0v4X3:2]=[O+0-0v2X1:3]>>[O+1v3X3:1]-[C+0-0v3X3:2]-[O-1v1X1:3]" + ), ] - return fix_mol(mol, remedies=remedies) + return fix_mol(mol, remedies=remedies, sanitize=sanitize) diff --git a/test/test_fix.py b/test/test_fix.py index ac7c9c93..c998c812 100644 --- a/test/test_fix.py +++ b/test/test_fix.py @@ -7,7 +7,7 @@ import pytest from rdmc import RDKitMol -from rdmc.fix import ALL_REMEDIES, fix_mol +from rdmc.fix import ALL_REMEDIES, fix_mol, fix_oxonium_bonds @pytest.mark.parametrize( @@ -69,3 +69,40 @@ def test_renumber_after_fix(): mol_fix = fix_mol(mol.Copy(quickCopy=True), renumber_atoms=True) assert mol.GetAtomMapNumbers() == mol_fix.GetAtomMapNumbers() assert mol.GetAtomicNumbers() == mol_fix.GetAtomicNumbers() + + +@pytest.mark.parametrize( + 'xyz, exp_smi', + [ + ( + """O -1.2607590000 0.7772420000 0.6085820000 +C -0.1650470000 -2.3539430000 2.2668210000 +C -0.4670120000 -2.1947580000 0.7809780000 +C 0.5724080000 -1.3963940000 -0.0563730000 +C 1.9166170000 -2.1487680000 -0.0973880000 +C 0.0355110000 -1.2164630000 -1.4811920000 +C 0.8592950000 -0.0701790000 0.6147050000 +O 1.6293140000 0.1954080000 1.4668300000 +O 0.0710230000 1.0551410000 0.0304340000 +C 0.5008030000 2.3927920000 0.4116770000 +H -0.9212150000 -2.9917470000 2.7288580000 +H -0.1856660000 -1.3928280000 2.7821170000 +H 0.8077150000 -2.8148360000 2.4472520000 +H -1.4311160000 -1.7082160000 0.6552790000 +H -0.5276310000 -3.1794610000 0.3074300000 +H 1.7489410000 -3.1449730000 -0.5091360000 +H 2.3570430000 -2.2480580000 0.8923780000 +H 2.6337360000 -1.6301710000 -0.7383130000 +H -0.0590770000 -2.2002990000 -1.9397630000 +H 0.7068050000 -0.6180050000 -2.0971060000 +H -0.9435140000 -0.7413070000 -1.4727710000 +H 0.4382590000 2.4894460000 1.4913270000 +H -0.1807540000 3.0525390000 -0.1120870000 +H 1.5196670000 2.5089310000 0.0492140000""", + 'CCC(C)(C)C(=O)[O+](C)[O-]' + ) + ] +) +def test_fix_oxonium(xyz, exp_smi): + mol = RDKitMol.FromXYZ(xyz, sanitize=False, header=False) + assert fix_oxonium_bonds(mol).ToSmiles() == exp_smi From 60b2f91eaea052f1ba4ceb38e6ae56e3baa2217f Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 2 Nov 2023 12:51:35 -0400 Subject: [PATCH 30/33] Bugfix: avoid double counting for O-O oxonium --- rdmc/fix.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rdmc/fix.py b/rdmc/fix.py index 0eb16085..cc9fc778 100644 --- a/rdmc/fix.py +++ b/rdmc/fix.py @@ -390,7 +390,11 @@ def fix_oxonium_bonds( mol = mol.Copy() for miss_bond in oxonium_bonds: - mol.AddBond(*miss_bond, order=BondType.SINGLE) + try: + mol.AddBond(*miss_bond, order=BondType.SINGLE) + except RuntimeError: + # Oxygen may get double counted + continue # Usually the connected atom is a radical site # So, update the number of radical electrons afterward From 572223a533b9af0a3e833b3ec4223ab2e702e31b Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 2 Nov 2023 13:38:13 -0400 Subject: [PATCH 31/33] Improve interact_irc 1. Add the `bothway` argument to avoid mistakenly changing the geometry sequence. 2. Allow passing additional arguments to the viewer --- rdmc/external/logparser/base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rdmc/external/logparser/base.py b/rdmc/external/logparser/base.py index 32e46e8e..09d6fb8e 100644 --- a/rdmc/external/logparser/base.py +++ b/rdmc/external/logparser/base.py @@ -995,7 +995,9 @@ def interact_irc(self, sanitize: bool = False, converged: bool = True, backend: str = 'openbabel', + bothway: bool = False, continuous_update: bool = False, + **kwargs, ) -> interact: """ Create a IPython interactive widget to investigate the IRC results. @@ -1009,7 +1011,7 @@ def interact_irc(self, Returns: interact """ - mol = self._process_irc_mol(sanitize=sanitize, converged=converged, backend=backend) + mol = self._process_irc_mol(sanitize=sanitize, converged=converged, backend=backend, bothway=bothway) sdfs = [mol.ToMolBlock(confId=i) for i in range(mol.GetNumConformers())] xyzs = self.get_xyzs(converged=converged) y_params = self.get_scf_energies(converged=converged) @@ -1024,7 +1026,7 @@ def interact_irc(self, ylabel = 'E(SCF) [kcal/mol]' def visual(idx): - mol_viewer(sdfs[idx - 1], 'sdf').update() + mol_viewer(sdfs[idx - 1], 'sdf', **kwargs).update() ax = plt.axes() ax.plot(x_params, y_params) ax.set(xlabel=xlabel, ylabel=ylabel) From 7c760460cf51e6562649d12cdfd9e3b468259b64 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 2 Nov 2023 15:52:55 -0400 Subject: [PATCH 32/33] Add a new fix for oxonium bonds --- rdmc/fix.py | 7 ++++++- test/test_fix.py | 27 +++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/rdmc/fix.py b/rdmc/fix.py index cc9fc778..32e592bd 100644 --- a/rdmc/fix.py +++ b/rdmc/fix.py @@ -369,7 +369,7 @@ def fix_oxonium_bonds( mol: "RDKitMol", threshold: float = 1.65, sanitize: bool = True, -) -> 'RDKitMol': +) -> "RDKitMol": """ Fix the oxonium atom. Openbabel and Jensen perception algorithm do not perceive the oxonium atom correctly. This is a fix to detect if the molecule contains oxonium atom and fix it. @@ -414,6 +414,11 @@ def fix_oxonium_bonds( rdChemReactions.ReactionFromSmarts( "[O+0-0v3X3:1]-[C+0-0v4X3:2]=[O+0-0v2X1:3]>>[O+1v3X3:1]-[C+0-0v3X3:2]-[O-1v1X1:3]" ), + # Remedy 3 - R[O](R)[C](R)R to R[O+](R)[C-](R)R + # This is a case combining a radical R[C](R)(R) with a radical R[O] + rdChemReactions.ReactionFromSmarts( + "[O+0-0v3X3:1]-[C+0-0v3X3:2]>>[O+1v3X3:1]-[C-1v3X3:2]" + ), ] return fix_mol(mol, remedies=remedies, sanitize=sanitize) diff --git a/test/test_fix.py b/test/test_fix.py index c998c812..d3bbc201 100644 --- a/test/test_fix.py +++ b/test/test_fix.py @@ -72,7 +72,7 @@ def test_renumber_after_fix(): @pytest.mark.parametrize( - 'xyz, exp_smi', + "xyz, exp_smi", [ ( """O -1.2607590000 0.7772420000 0.6085820000 @@ -99,9 +99,28 @@ def test_renumber_after_fix(): H 0.4382590000 2.4894460000 1.4913270000 H -0.1807540000 3.0525390000 -0.1120870000 H 1.5196670000 2.5089310000 0.0492140000""", - 'CCC(C)(C)C(=O)[O+](C)[O-]' - ) - ] + "CCC(C)(C)C(=O)[O+](C)[O-]", + ), + ( + """O -1.17118700 -1.50146090 -2.47413980 +H -2.01769360 -1.92927720 -2.63341740 +C -1.98485810 2.24098580 -2.21737070 +C -1.70946220 0.80773220 -1.77051860 +C -1.32477280 -0.00677400 -2.94764940 +H -0.87667130 -1.54124320 -1.46319570 +H -0.30275440 0.21066780 -3.26524910 +N -0.71008250 0.93599160 -0.58681770 +C -0.18358680 -0.06896500 0.08764570 +O -0.33148990 -1.27959420 -0.16793930 +H -2.82713830 2.25060270 -2.90397100 +H -1.11829350 2.64922810 -2.74266840 +H -2.21700900 2.89116810 -1.36932880 +H -2.60626880 0.39545810 -1.29598380 +H -0.49062240 1.86220920 -0.24896060 +H 0.43540870 0.21959280 0.94605120""", + "CC([CH-][OH2+])NC=O", + ), + ], ) def test_fix_oxonium(xyz, exp_smi): mol = RDKitMol.FromXYZ(xyz, sanitize=False, header=False) From aebc2ae8f585f134f02927d4ab137ec2a0a105d9 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 2 Nov 2023 17:25:11 -0400 Subject: [PATCH 33/33] Add a filter for items from ResonanceMolSupplier For unknown reason, it may yield None object unexpectedly. The commit add a filter for its output. --- rdmc/resonance/resonance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdmc/resonance/resonance.py b/rdmc/resonance/resonance.py index 53a233ca..c01d4451 100644 --- a/rdmc/resonance/resonance.py +++ b/rdmc/resonance/resonance.py @@ -71,7 +71,7 @@ def generate_radical_resonance_structures( if kekulize: flags |= Chem.KEKULE_ALL suppl = Chem.ResonanceMolSupplier(mol_copy._mol, flags=flags) - res_mols = [RDKitMol(RWMol(mol)) for mol in suppl] + res_mols = [RDKitMol(RWMol(mol)) for mol in suppl if mol is not None] # Post-processing resonance structures cleaned_mols = []