Skip to content

Commit

Permalink
Deprecate fix_CO in the from_xyz workflow and add it to fix_mol
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaoruiDong committed Nov 1, 2023
1 parent b981120 commit 57d03e3
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 69 deletions.
26 changes: 15 additions & 11 deletions rdmc/fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,48 +17,52 @@
rdChemReactions.ReactionFromSmarts(
"[O+0-0v2X1:1]=[C+0-0v2X1:2]>>[O+1v3X1:1]#[C-1v3X1:2]"
),
# Remedy 2 - Oxygen Molecule: O=O to [O]-[O]
# Remedy 2 - Carbon monoxide: [C]=O to [C-]#[O+]
rdChemReactions.ReactionFromSmarts(
"[O+0-0v3X1:1]#[C+0-0v3X1:2]>>[O+1v3X1:1]#[C-1v3X1:2]"
),
# Remedy 3 - Oxygen Molecule: O=O to [O]-[O]
rdChemReactions.ReactionFromSmarts(
"[O+0-0v2X1:1]=[O+0-0v2X1:2]>>[O+0-0v1X1:1]-[O+0-0v1X1:2]"
),
# Remedy 3 - isocyanide: R[N]#[C] to R[N+]#[C-]
# Remedy 4 - isocyanide: R[N]#[C] to R[N+]#[C-]
rdChemReactions.ReactionFromSmarts(
"[N+0-0v4X2:1]#[C+0-0v3X1:2]>>[N+v4X2:1]#[C-v3X1:2]"
),
# Remedy 4 - azide: RN=N=[N] to RN=[N+]=[N-]
# Remedy 5 - azide: RN=N=[N] to RN=[N+]=[N-]
rdChemReactions.ReactionFromSmarts(
"[N+0-0v3X2:1]=[N+0-0v4X2:2]=[N+0-0v2X1:3]>>[N+0-0v3X2:1]=[N+1v4X2:2]=[N-1v2X1:3]"
),
# Remedy 5 - amine oxide: RN(R)(R)-O to R[N+](R)(R)-[O-]
# Remedy 6 - amine oxide: RN(R)(R)-O to R[N+](R)(R)-[O-]
rdChemReactions.ReactionFromSmarts(
"[N+0-0v4X4:1]-[O+0-0v1X1:2]>>[N+1v4X4:1]-[O-1v1X1:2]"
),
# Remedy 6 - amine radical: R[C](R)-N(R)(R)R to R[C-](R)-[N+](R)(R)R
# Remedy 7 - amine radical: R[C](R)-N(R)(R)R to R[C-](R)-[N+](R)(R)R
rdChemReactions.ReactionFromSmarts(
"[N+0-0v4X4:1]-[C+0-0v3X3:2]>>[N+1v4X4:1]-[C-1v3X3:2]"
),
# Remedy 7 - amine radical: RN(R)=C to RN(R)-[C]
# Remedy 8 - amine radical: RN(R)=C to RN(R)-[C]
rdChemReactions.ReactionFromSmarts(
"[N+0-0v4X3:1]=[C+0-0v4X3:2]>>[N+0-0v3X3:1]-[C+0-0v3X3:2]"
),
# Remedy 8 - quintuple C bond, usually due to RC(=O)=O: R=C(R)=O to R=C(R)-[O]
# Remedy 9 - quintuple C bond, usually due to RC(=O)=O: R=C(R)=O to R=C(R)-[O]
rdChemReactions.ReactionFromSmarts(
"[C+0-0v5X3:1]=[O+0-0v2X1:2]>>[C+0-0v4X3:1]-[O+0-0v1X1:2]"
),
# Remedy 9 - sulphuric bi-radicals: R[S](R)(-[O])-[O] to R[S](R)(=O)(=O)
# Remedy 10 - sulphuric bi-radicals: R[S](R)(-[O])-[O] to R[S](R)(=O)(=O)
rdChemReactions.ReactionFromSmarts(
"[S+0-0v4X4:1](-[O+0-0v1X1:2])-[O+0-0v1X1:3]>>[S+0-0v6X4:1](=[O+0-0v2X1:2])=[O+0-0v2X1:3]"
),
# Remedy 10 - Triazinane: C1=N=C=N=C=N=1 to c1ncncn1
# Remedy 11 - Triazinane: C1=N=C=N=C=N=1 to c1ncncn1
rdChemReactions.ReactionFromSmarts(
"[C+0-0v5X3:1]1=[N+0-0v4X2:2]=[C+0-0v5X3:3]=[N+0-0v4X2:4]=[C+0-0v5X3:5]=[N+0-0v4X2:6]=1"
">>[C+0-0v5X3:1]1[N+0-0v4X2:2]=[C+0-0v5X3:3][N+0-0v4X2:4]=[C+0-0v5X3:5][N+0-0v4X2:6]=1"
),
# Remedy 11 - peroxide biradical: R[C](R)O[O] to R[C+](R)O[O-]
# Remedy 12 - peroxide biradical: R[C](R)O[O] to R[C+](R)O[O-]
rdChemReactions.ReactionFromSmarts(
"[C+0-0v3X3:1]-[O+0-0v2X2:2]-[O+0-0v1X1:3]>>[C+1v3X3:1]-[O+0-0v2X2:2]-[O-1v1X1:3]"
),
# Remedy 12 - conjugate peroxide biradical: [C]-C=C(R)O[O] to C=C-[C+](R)O[O-]
# Remedy 13 - conjugate peroxide biradical: [C]-C=C(R)O[O] to C=C-[C+](R)O[O-]
rdChemReactions.ReactionFromSmarts(
"[C+0-0v3X3:1]-[C:2]=[C+0-0v4X3:3]-[O+0-0v2X2:4]-[O+0-0v1X1:5]>>[C+0-0v4X3:1]=[C:2]-[C+1v3X3:3]-[O+0-0v2X2:4]-[O-1v1X1:5]"
),
Expand Down
8 changes: 2 additions & 6 deletions rdmc/mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,6 @@ def FromXYZ(
xyz: str,
backend: str = "openbabel",
header: bool = True,
correctCO: bool = True,
sanitize: bool = True,
embed_chiral: bool = False,
**kwargs,
Expand All @@ -619,7 +618,6 @@ def FromXYZ(
Currently, we only support ``'openbabel'`` and ``'jensen'``.
header (bool, optional): If lines of the number of atoms and title are included.
Defaults to ``True.``
correctCO (bool, optional): Whether to correct the CO bond as "[C-]#[O+]". Defaults to ``True``.
sanitize (bool): Sanitize the RDKit molecule during conversion. Helpful to set it to ``False``
when reading in TSs. Defaults to ``True``.
embed_chiral: ``True`` to embed chiral information. Defaults to ``True``.
Expand All @@ -642,7 +640,7 @@ def FromXYZ(

# Openbabel support read xyz and perceive atom connectivities
if backend.lower() == "openbabel":
obmol = parse_xyz_by_openbabel(xyz, correct_CO=correctCO)
obmol = parse_xyz_by_openbabel(xyz)
rdmol = cls.FromOBMol(obmol, sanitize=sanitize)
if embed_chiral:
rdmol.AssignStereochemistryFrom3D()
Expand All @@ -652,7 +650,7 @@ def FromXYZ(
# provides an approach to convert xyz to mol
elif backend.lower() == "jensen":
mol = parse_xyz_by_jensen(
xyz, correct_CO=correctCO, embed_chiral=embed_chiral, **kwargs
xyz, embed_chiral=embed_chiral, **kwargs
)
return cls(mol)

Expand Down Expand Up @@ -690,7 +688,6 @@ def FromFile(
path: str,
backend: str = "openbabel",
header: bool = True,
correctCO: bool = True,
removeHs: bool = False,
sanitize: bool = True,
sameMol: bool = False,
Expand Down Expand Up @@ -723,7 +720,6 @@ def FromFile(
xyz,
backend=backend,
header=header,
correctCO=correctCO,
sanitize=sanitize,
**kwargs,
)
Expand Down
49 changes: 1 addition & 48 deletions rdmc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,6 @@
"[!$(*#*)&!D1!H3]-&!@[!$(*#*)&!D1&!H3]"
)

# When perceiving molecules, openbabel will always perceive carbon monoxide as [C]=O
# Needs to correct it by [C-]#[O+]
CO_OPENBABEL_PATTERN = ob.OBSmartsPattern()
CO_OPENBABEL_PATTERN.Init("[Cv2X1]=[OX1]")

# Carbene, nitrene, and atomic oxygen templates. RDKit and Openbabel have difficulty
# distinguish their multiplicity when input as SMILES or XYZ
CARBENE_PATTERN = Chem.MolFromSmarts("[Cv0,Cv1,Cv2,Nv0,Nv1,Ov0]")
Expand Down Expand Up @@ -457,38 +452,12 @@ def set_obmol_coords(obmol: ob.OBMol, coords: np.array):
atom.SetVector(ob.vector3(*coords[atom_idx].tolist()))


def fix_CO_openbabel(obmol: "Openbabel.OBMol", correct_CO: bool = True):
"""
Fix the CO perception issue for openbabel molecule.
Args:
obmol (Openbabel.OBMol): The Openbabel molecule instance.
correct_CO (bool, optional): Whether to fix this issue. Defaults to True.
"""
if not correct_CO:
return
CO_OPENBABEL_PATTERN.Match(obmol)
for pair in CO_OPENBABEL_PATTERN.GetUMapList():
obmol.GetBond(*pair).SetBondOrder(3)
for idx in pair:
atom = obmol.GetAtom(idx)
if atom.GetAtomicNum() == 6:
atom.SetSpinMultiplicity(0)
atom.SetFormalCharge(-1)
elif atom.GetAtomicNum() == 8:
atom.SetSpinMultiplicity(0)
atom.SetFormalCharge(+1)


def parse_xyz_by_openbabel(xyz: str, correct_CO: bool = True):
def parse_xyz_by_openbabel(xyz: str):
"""
Perceive a xyz str using openbabel and generate the corresponding OBMol.
Args:
xyz (str): A str in xyz format containing atom positions.
correctCO (bool, optional): It is known that openbabel will parse carbon monoxide
as [C]=O instead of [C-]#[O+]. This function contains
a patch to correct that. Defaults to ``True``.
Returns:
ob.OBMol: An openbabel molecule from the xyz
Expand Down Expand Up @@ -522,9 +491,6 @@ def parse_xyz_by_openbabel(xyz: str, correct_CO: bool = True):
):
obatom.SetSpinMultiplicity(2)

# Correct [C]=O to [C-]#[O+]
fix_CO_openbabel(obmol, correct_CO=correct_CO)

return obmol


Expand Down Expand Up @@ -620,7 +586,6 @@ def parse_xyz_by_jensen(
allow_charged_fragments: bool = False,
use_huckel: bool = False,
embed_chiral: bool = True,
correct_CO: bool = True,
use_atom_maps: bool = False,
force_rdmc: bool = False,
**kwargs,
Expand All @@ -634,10 +599,6 @@ def parse_xyz_by_jensen(
allow_charged_fragments: ``True`` for charged fragment, ``False`` for radical. Defaults to False.
use_huckel: ``True`` to use extended Huckel bond orders to locate bonds. Defaults to False.
embed_chiral: ``True`` to embed chiral information. Defaults to True.
correctCO (bool, optional): Defaults to ``True``.
In order to get correct RDKit molecule for carbon monoxide
([C-]#[O+]), allow_charged_fragments should be forced to ``True``.
This function contains a patch to correct that.
use_atom_maps(bool, optional): ``True`` to set atom map numbers to the molecule. Defaults to ``False``.
force_rdmc (bool, optional): Defaults to ``False``. In rare case, we may hope to use a tailored
version of the Jensen XYZ parser, other than the one available in RDKit.
Expand All @@ -657,7 +618,6 @@ def parse_xyz_by_jensen(
use_huckel=use_huckel,
embed_chiral=embed_chiral,
use_atom_maps=use_atom_maps,
correct_CO=correct_CO,
)

# Version >= 2022.09.1
Expand All @@ -684,13 +644,6 @@ def parse_xyz_by_jensen(
useHueckel=use_huckel,
charge=charge,
)
# A force correction for CO
if (
correct_CO
and mol.GetNumAtoms() == 2
and {atom.GetAtomicNum() for atom in mol.GetAtoms()} == {6, 8}
):
allow_charged_fragments = True
rdDetermineBonds.DetermineBondOrders(
mol,
charge=charge,
Expand Down
1 change: 1 addition & 0 deletions test/test_fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def test_fix_sanitize_ok(smi, exp_smi):
@pytest.mark.parametrize(
"smi, exp_smi",
[
("[C]#[O]", "[C-]#[O+]"),
("[NH3][O]", "[NH3+][O-]"),
("[CH2][NH3]", "[CH2-][NH3+]"),
("[C]#[NH]", "[C-]#[NH+]"),
Expand Down
4 changes: 0 additions & 4 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@


@pytest.fixture(params=[
('[C-]#[O+]', """2
C 0.559061 0.000000 0.000000
O -0.559061 0.000000 0.000000"""),
('C', """5
C 0.005119 -0.010620 0.006014
Expand Down

0 comments on commit 57d03e3

Please sign in to comment.