diff --git a/rdmc/fix.py b/rdmc/fix.py index 3cd9d176..58f10dbc 100644 --- a/rdmc/fix.py +++ b/rdmc/fix.py @@ -17,48 +17,52 @@ rdChemReactions.ReactionFromSmarts( "[O+0-0v2X1:1]=[C+0-0v2X1:2]>>[O+1v3X1:1]#[C-1v3X1:2]" ), - # Remedy 2 - Oxygen Molecule: O=O to [O]-[O] + # Remedy 2 - Carbon monoxide: [C]=O to [C-]#[O+] + rdChemReactions.ReactionFromSmarts( + "[O+0-0v3X1:1]#[C+0-0v3X1:2]>>[O+1v3X1:1]#[C-1v3X1:2]" + ), + # Remedy 3 - Oxygen Molecule: O=O to [O]-[O] rdChemReactions.ReactionFromSmarts( "[O+0-0v2X1:1]=[O+0-0v2X1:2]>>[O+0-0v1X1:1]-[O+0-0v1X1:2]" ), - # Remedy 3 - isocyanide: R[N]#[C] to R[N+]#[C-] + # Remedy 4 - isocyanide: R[N]#[C] to R[N+]#[C-] rdChemReactions.ReactionFromSmarts( "[N+0-0v4X2:1]#[C+0-0v3X1:2]>>[N+v4X2:1]#[C-v3X1:2]" ), - # Remedy 4 - azide: RN=N=[N] to RN=[N+]=[N-] + # Remedy 5 - azide: RN=N=[N] to RN=[N+]=[N-] rdChemReactions.ReactionFromSmarts( "[N+0-0v3X2:1]=[N+0-0v4X2:2]=[N+0-0v2X1:3]>>[N+0-0v3X2:1]=[N+1v4X2:2]=[N-1v2X1:3]" ), - # Remedy 5 - amine oxide: RN(R)(R)-O to R[N+](R)(R)-[O-] + # Remedy 6 - amine oxide: RN(R)(R)-O to R[N+](R)(R)-[O-] rdChemReactions.ReactionFromSmarts( "[N+0-0v4X4:1]-[O+0-0v1X1:2]>>[N+1v4X4:1]-[O-1v1X1:2]" ), - # Remedy 6 - amine radical: R[C](R)-N(R)(R)R to R[C-](R)-[N+](R)(R)R + # Remedy 7 - amine radical: R[C](R)-N(R)(R)R to R[C-](R)-[N+](R)(R)R rdChemReactions.ReactionFromSmarts( "[N+0-0v4X4:1]-[C+0-0v3X3:2]>>[N+1v4X4:1]-[C-1v3X3:2]" ), - # Remedy 7 - amine radical: RN(R)=C to RN(R)-[C] + # Remedy 8 - amine radical: RN(R)=C to RN(R)-[C] rdChemReactions.ReactionFromSmarts( "[N+0-0v4X3:1]=[C+0-0v4X3:2]>>[N+0-0v3X3:1]-[C+0-0v3X3:2]" ), - # Remedy 8 - quintuple C bond, usually due to RC(=O)=O: R=C(R)=O to R=C(R)-[O] + # Remedy 9 - quintuple C bond, usually due to RC(=O)=O: R=C(R)=O to R=C(R)-[O] rdChemReactions.ReactionFromSmarts( "[C+0-0v5X3:1]=[O+0-0v2X1:2]>>[C+0-0v4X3:1]-[O+0-0v1X1:2]" ), - # Remedy 9 - sulphuric bi-radicals: R[S](R)(-[O])-[O] to R[S](R)(=O)(=O) + # Remedy 10 - sulphuric bi-radicals: R[S](R)(-[O])-[O] to R[S](R)(=O)(=O) rdChemReactions.ReactionFromSmarts( "[S+0-0v4X4:1](-[O+0-0v1X1:2])-[O+0-0v1X1:3]>>[S+0-0v6X4:1](=[O+0-0v2X1:2])=[O+0-0v2X1:3]" ), - # Remedy 10 - Triazinane: C1=N=C=N=C=N=1 to c1ncncn1 + # Remedy 11 - Triazinane: C1=N=C=N=C=N=1 to c1ncncn1 rdChemReactions.ReactionFromSmarts( "[C+0-0v5X3:1]1=[N+0-0v4X2:2]=[C+0-0v5X3:3]=[N+0-0v4X2:4]=[C+0-0v5X3:5]=[N+0-0v4X2:6]=1" ">>[C+0-0v5X3:1]1[N+0-0v4X2:2]=[C+0-0v5X3:3][N+0-0v4X2:4]=[C+0-0v5X3:5][N+0-0v4X2:6]=1" ), - # Remedy 11 - peroxide biradical: R[C](R)O[O] to R[C+](R)O[O-] + # Remedy 12 - peroxide biradical: R[C](R)O[O] to R[C+](R)O[O-] rdChemReactions.ReactionFromSmarts( "[C+0-0v3X3:1]-[O+0-0v2X2:2]-[O+0-0v1X1:3]>>[C+1v3X3:1]-[O+0-0v2X2:2]-[O-1v1X1:3]" ), - # Remedy 12 - conjugate peroxide biradical: [C]-C=C(R)O[O] to C=C-[C+](R)O[O-] + # Remedy 13 - conjugate peroxide biradical: [C]-C=C(R)O[O] to C=C-[C+](R)O[O-] rdChemReactions.ReactionFromSmarts( "[C+0-0v3X3:1]-[C:2]=[C+0-0v4X3:3]-[O+0-0v2X2:4]-[O+0-0v1X1:5]>>[C+0-0v4X3:1]=[C:2]-[C+1v3X3:3]-[O+0-0v2X2:4]-[O-1v1X1:5]" ), diff --git a/rdmc/mol.py b/rdmc/mol.py index 5549cb4d..774bf295 100644 --- a/rdmc/mol.py +++ b/rdmc/mol.py @@ -605,7 +605,6 @@ def FromXYZ( xyz: str, backend: str = "openbabel", header: bool = True, - correctCO: bool = True, sanitize: bool = True, embed_chiral: bool = False, **kwargs, @@ -619,7 +618,6 @@ def FromXYZ( Currently, we only support ``'openbabel'`` and ``'jensen'``. header (bool, optional): If lines of the number of atoms and title are included. Defaults to ``True.`` - correctCO (bool, optional): Whether to correct the CO bond as "[C-]#[O+]". Defaults to ``True``. sanitize (bool): Sanitize the RDKit molecule during conversion. Helpful to set it to ``False`` when reading in TSs. Defaults to ``True``. embed_chiral: ``True`` to embed chiral information. Defaults to ``True``. @@ -642,7 +640,7 @@ def FromXYZ( # Openbabel support read xyz and perceive atom connectivities if backend.lower() == "openbabel": - obmol = parse_xyz_by_openbabel(xyz, correct_CO=correctCO) + obmol = parse_xyz_by_openbabel(xyz) rdmol = cls.FromOBMol(obmol, sanitize=sanitize) if embed_chiral: rdmol.AssignStereochemistryFrom3D() @@ -652,7 +650,7 @@ def FromXYZ( # provides an approach to convert xyz to mol elif backend.lower() == "jensen": mol = parse_xyz_by_jensen( - xyz, correct_CO=correctCO, embed_chiral=embed_chiral, **kwargs + xyz, embed_chiral=embed_chiral, **kwargs ) return cls(mol) @@ -690,7 +688,6 @@ def FromFile( path: str, backend: str = "openbabel", header: bool = True, - correctCO: bool = True, removeHs: bool = False, sanitize: bool = True, sameMol: bool = False, @@ -723,7 +720,6 @@ def FromFile( xyz, backend=backend, header=header, - correctCO=correctCO, sanitize=sanitize, **kwargs, ) diff --git a/rdmc/utils.py b/rdmc/utils.py index c0d684af..f7b0f456 100644 --- a/rdmc/utils.py +++ b/rdmc/utils.py @@ -56,11 +56,6 @@ "[!$(*#*)&!D1!H3]-&!@[!$(*#*)&!D1&!H3]" ) -# When perceiving molecules, openbabel will always perceive carbon monoxide as [C]=O -# Needs to correct it by [C-]#[O+] -CO_OPENBABEL_PATTERN = ob.OBSmartsPattern() -CO_OPENBABEL_PATTERN.Init("[Cv2X1]=[OX1]") - # Carbene, nitrene, and atomic oxygen templates. RDKit and Openbabel have difficulty # distinguish their multiplicity when input as SMILES or XYZ CARBENE_PATTERN = Chem.MolFromSmarts("[Cv0,Cv1,Cv2,Nv0,Nv1,Ov0]") @@ -457,38 +452,12 @@ def set_obmol_coords(obmol: ob.OBMol, coords: np.array): atom.SetVector(ob.vector3(*coords[atom_idx].tolist())) -def fix_CO_openbabel(obmol: "Openbabel.OBMol", correct_CO: bool = True): - """ - Fix the CO perception issue for openbabel molecule. - - Args: - obmol (Openbabel.OBMol): The Openbabel molecule instance. - correct_CO (bool, optional): Whether to fix this issue. Defaults to True. - """ - if not correct_CO: - return - CO_OPENBABEL_PATTERN.Match(obmol) - for pair in CO_OPENBABEL_PATTERN.GetUMapList(): - obmol.GetBond(*pair).SetBondOrder(3) - for idx in pair: - atom = obmol.GetAtom(idx) - if atom.GetAtomicNum() == 6: - atom.SetSpinMultiplicity(0) - atom.SetFormalCharge(-1) - elif atom.GetAtomicNum() == 8: - atom.SetSpinMultiplicity(0) - atom.SetFormalCharge(+1) - - -def parse_xyz_by_openbabel(xyz: str, correct_CO: bool = True): +def parse_xyz_by_openbabel(xyz: str): """ Perceive a xyz str using openbabel and generate the corresponding OBMol. Args: xyz (str): A str in xyz format containing atom positions. - correctCO (bool, optional): It is known that openbabel will parse carbon monoxide - as [C]=O instead of [C-]#[O+]. This function contains - a patch to correct that. Defaults to ``True``. Returns: ob.OBMol: An openbabel molecule from the xyz @@ -522,9 +491,6 @@ def parse_xyz_by_openbabel(xyz: str, correct_CO: bool = True): ): obatom.SetSpinMultiplicity(2) - # Correct [C]=O to [C-]#[O+] - fix_CO_openbabel(obmol, correct_CO=correct_CO) - return obmol @@ -620,7 +586,6 @@ def parse_xyz_by_jensen( allow_charged_fragments: bool = False, use_huckel: bool = False, embed_chiral: bool = True, - correct_CO: bool = True, use_atom_maps: bool = False, force_rdmc: bool = False, **kwargs, @@ -634,10 +599,6 @@ def parse_xyz_by_jensen( allow_charged_fragments: ``True`` for charged fragment, ``False`` for radical. Defaults to False. use_huckel: ``True`` to use extended Huckel bond orders to locate bonds. Defaults to False. embed_chiral: ``True`` to embed chiral information. Defaults to True. - correctCO (bool, optional): Defaults to ``True``. - In order to get correct RDKit molecule for carbon monoxide - ([C-]#[O+]), allow_charged_fragments should be forced to ``True``. - This function contains a patch to correct that. use_atom_maps(bool, optional): ``True`` to set atom map numbers to the molecule. Defaults to ``False``. force_rdmc (bool, optional): Defaults to ``False``. In rare case, we may hope to use a tailored version of the Jensen XYZ parser, other than the one available in RDKit. @@ -657,7 +618,6 @@ def parse_xyz_by_jensen( use_huckel=use_huckel, embed_chiral=embed_chiral, use_atom_maps=use_atom_maps, - correct_CO=correct_CO, ) # Version >= 2022.09.1 @@ -684,13 +644,6 @@ def parse_xyz_by_jensen( useHueckel=use_huckel, charge=charge, ) - # A force correction for CO - if ( - correct_CO - and mol.GetNumAtoms() == 2 - and {atom.GetAtomicNum() for atom in mol.GetAtoms()} == {6, 8} - ): - allow_charged_fragments = True rdDetermineBonds.DetermineBondOrders( mol, charge=charge, diff --git a/test/test_fix.py b/test/test_fix.py index b733b2a0..1593f53f 100644 --- a/test/test_fix.py +++ b/test/test_fix.py @@ -28,6 +28,7 @@ def test_fix_sanitize_ok(smi, exp_smi): @pytest.mark.parametrize( "smi, exp_smi", [ + ("[C]#[O]", "[C-]#[O+]"), ("[NH3][O]", "[NH3+][O-]"), ("[CH2][NH3]", "[CH2-][NH3+]"), ("[C]#[NH]", "[C-]#[NH+]"), diff --git a/test/test_utils.py b/test/test_utils.py index 011e76bc..5c7d7d43 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -22,10 +22,6 @@ @pytest.fixture(params=[ - ('[C-]#[O+]', """2 - -C 0.559061 0.000000 0.000000 -O -0.559061 0.000000 0.000000"""), ('C', """5 C 0.005119 -0.010620 0.006014