Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve how large molecules are automatically cut during mechanism generation #2660

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
146 changes: 98 additions & 48 deletions rmgpy/molecule/fragment.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from rmgpy.molecule.molecule import Atom, Bond, Molecule
from rmgpy.molecule.atomtype import get_atomtype, AtomTypeError, ATOMTYPES, AtomType
from rdkit import Chem

from numpy.random import randint
# this variable is used to name atom IDs so that there are as few conflicts by
# using the entire space of integer objects
ATOM_ID_COUNTER = -(2**15)
Expand Down Expand Up @@ -836,7 +836,7 @@ def from_rdkit_mol(self, rdkitmol, atom_replace_dict=None):

return self

def cut_molecule(self, output_smiles=False, cut_through=True, size_threshold=None):
def cut_molecule(self, output_smiles=False, cut_through=True, size_threshold=5):
"""
For given input, output a list of cut fragments (either string or Fragment).
if output_smiles = True, the output list of fragments will be smiles.
Expand Down Expand Up @@ -888,15 +888,10 @@ def cut_molecule(self, output_smiles=False, cut_through=True, size_threshold=Non
frag_list.append(res_frag)
return frag_list

def sliceitup_arom(self, molecule, size_threshold=None):
def sliceitup_arom(self, molecule, size_threshold=5):
"""
Several specified aromatic patterns
"""
# set min size for each aliphatic fragment size
if size_threshold:
size_threshold = size_threshold
else:
size_threshold = 5
# if input is smiles string, output smiles
if isinstance(molecule, str):
molecule_smiles = molecule
Expand Down Expand Up @@ -950,27 +945,67 @@ def sliceitup_arom(self, molecule, size_threshold=None):
# mol_set contains new set of fragments
mol_set = Chem.GetMolFrags(new_mol, asMols=True)
# check all fragments' size
if all(
sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6)
>= size_threshold
for mol in mol_set
):
# replace * at cutting position with cutting label
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if len(mol_set) > 2: # means it cut into 3 fragments

if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set):
if len(mol_set) == 2:
frag1 = Chem.MolToSmiles(mol_set[0])
frag2 = Chem.MolToSmiles(mol_set[1])

frag1_R = frag1.count("Na")
frag1_L = frag1.count("K")
frag2_R = frag2.count("Na")
frag2_L = frag2.count("K")

# if frag 2 has the least Rs and frag 1 has the
# same or fewer Ls than frag 2 -->
# assign R to frag 2 and L to frag 1
if frag1_R > frag2_R and frag1_L <= frag2_L:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")

# if frag 2 has the least Ls and frag 1 has the
# same or fewer Rs than frag 2 -->
# assign R to frag 1 and L to frag 2
elif frag1_L > frag2_L and frag1_R <= frag2_R:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")

# if frag 1 has the least Ls and frag 2 has the
# same or fewer Rs than frag 1 -->
# assign R to frag 2 and L to frag 1
elif frag2_L > frag1_L and frag2_R <= frag1_R:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")

# if frag 1 has the least Rs and frag 2 has the
# same or fewer Ls than frag 1 -->
# assign R to frag 1 and L to frag 2
elif frag2_R > frag1_R and frag2_L <= frag1_L:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")

# else if frag 1 and frag 2 have equal number
# of Rs and Ls or one frag has more Rs and
# more Ls than the other, choose randomly
elif randint(0, 1) == 1:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
else:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")

frag_list = [frag1_smi, frag2_smi]

elif len(mol_set) > 2: # means it cut into 3 fragments
frag_list = []
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if frag.count("*") > 1:
# replace both with R
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
else: # means it only cut once, generate 2 fragments
if ind == 0:
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
frag_list.append(frag_smi)
break
frag_list.append(frag_smi)
break
else:
# turn to next matched_atom_map
continue
Expand Down Expand Up @@ -1014,15 +1049,10 @@ def sliceitup_arom(self, molecule, size_threshold=None):
frag_list_new.append(res_frag)
return frag_list_new

def sliceitup_aliph(self, molecule, size_threshold=None):
def sliceitup_aliph(self, molecule, size_threshold=5):
"""
Several specified aliphatic patterns
"""
# set min size for each aliphatic fragment size
if size_threshold:
size_threshold = size_threshold
else:
size_threshold = 5
# if input is smiles string, output smiles
if isinstance(molecule, str):
molecule_smiles = molecule
Expand Down Expand Up @@ -1079,27 +1109,47 @@ def sliceitup_aliph(self, molecule, size_threshold=None):
# mol_set contains new set of fragments
mol_set = Chem.GetMolFrags(new_mol, asMols=True)
# check all fragments' size
if all(
sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6)
>= size_threshold
for mol in mol_set
):
# replace * at cutting position with cutting label
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if len(mol_set) > 2: # means it cut into 3 fragments
if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set):
if len(mol_set) == 2:
frag1 = Chem.MolToSmiles(mol_set[0])
frag2 = Chem.MolToSmiles(mol_set[1])

frag1_R = frag1.count("Na")
frag1_L = frag1.count("K")
frag2_R = frag2.count("Na")
frag2_L = frag2.count("K")

if frag1_R > frag2_R and frag1_L <= frag2_L:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
elif frag1_L > frag2_L and frag1_R <= frag2_R:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif frag2_L > frag1_L and frag2_R <= frag1_R:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
elif frag2_R > frag1_R and frag2_L <= frag1_L:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif randint(0,1)==1:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
else:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")

frag_list = [frag1_smi, frag2_smi]

elif len(mol_set) > 2: # means it cut into 3 fragments
frag_list = []
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if frag.count("*") > 1:
# replace both with R
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
else: # means it only cut once, generate 2 fragments
if ind == 0:
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
frag_list.append(frag_smi)
break
frag_list.append(frag_smi)
break
else:
# turn to next matched_atom_map
continue
Expand Down
Loading
Loading