-
Notifications
You must be signed in to change notification settings - Fork 100
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #8 from halx/main
Added descriptors
- Loading branch information
Showing
12 changed files
with
956 additions
and
16 deletions.
There are no files selected for viewing
Empty file.
104 changes: 104 additions & 0 deletions
104
contrib/reinvent_plugins/components/RDKit_extra/comp_pharm2d.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
"""RDKit 2D Pharmacophore Fingerprints""" | ||
|
||
from __future__ import annotations | ||
|
||
__all__ = ["Pharm2DFP"] | ||
|
||
import os | ||
from dataclasses import dataclass | ||
from typing import List | ||
import logging | ||
|
||
from rdkit import Chem, DataStructs | ||
from rdkit.Chem import ChemicalFeatures | ||
from rdkit.Chem.Pharm2D import Generate, Gobbi_Pharm2D | ||
from rdkit.Chem.Pharm2D.SigFactory import SigFactory | ||
import numpy as np | ||
|
||
from ..component_results import ComponentResults | ||
from reinvent_plugins.mol_cache import molcache | ||
from ..add_tag import add_tag | ||
|
||
logger = logging.getLogger("reinvent") | ||
|
||
FEATURE_DIR = os.path.join(os.path.dirname(__file__), "features") | ||
|
||
|
||
@add_tag("__parameters") | ||
@dataclass | ||
class Parameters: | ||
ref_smiles: List[str] | ||
feature_definition: List[str] # base, minimal, gobbi | ||
bins: List[List[int]] | ||
min_point_count: List[int] | ||
max_point_count: List[int] | ||
similarity: List[str] | ||
similarity_params: List[dict] | ||
|
||
|
||
@add_tag("__component") | ||
class Pharm2DFP: | ||
def __init__(self, params: Parameters): | ||
self.ref_fps = [] | ||
self.signature_factories = [] | ||
self.similarities = [] | ||
self.similarities_params = [] | ||
|
||
for smiles, fdef, bins, minp, maxp, sim, sim_params in zip( | ||
params.ref_smiles, | ||
params.feature_definition, | ||
params.bins, | ||
params.min_point_count, | ||
params.max_point_count, | ||
params.similarity, | ||
params.similarity_params, | ||
): | ||
fdef_name = fdef.capitalize() | ||
|
||
if fdef_name == "Gobbi": | ||
signature_factory = Gobbi_Pharm2D.factory | ||
else: | ||
fdef_filename = os.path.join(FEATURE_DIR, f"{fdef_name}Features.fdef") | ||
|
||
feature_factory = ChemicalFeatures.BuildFeatureFactory(fdef_filename) | ||
signature_factory = SigFactory( | ||
feature_factory, minPointCount=minp, maxPointCount=maxp | ||
) | ||
|
||
b = iter(bins) | ||
signature_factory.SetBins(list(zip(b, b))) | ||
signature_factory.Init() | ||
|
||
self.signature_factories.append(signature_factory) | ||
|
||
mol = Chem.MolFromSmiles(smiles) | ||
|
||
if not mol: | ||
raise RuntimeError(f"{__name__}: invalid SMILES {smiles}") | ||
|
||
fp = Generate.Gen2DFingerprint(mol, signature_factory) # replace | ||
self.ref_fps.append(fp) | ||
|
||
sim_name = sim.capitalize() | ||
|
||
try: | ||
self.similarities.append(getattr(DataStructs, f"Bulk{sim_name}Similarity")) | ||
self.similarities_params.append(sim_params) | ||
except: | ||
raise RuntimeError(f"{__name__}: {sim_name} similarity not supported by RDKit") | ||
|
||
@molcache | ||
def __call__(self, mols: List[Chem.Mol]) -> np.array: | ||
scores = [] | ||
|
||
for ref_fp, signature_factory, similarity, sim_params in zip( | ||
self.ref_fps, self.signature_factories, self.similarities, self.similarities_params | ||
): | ||
target_fps = [] | ||
|
||
for mol in mols: | ||
target_fps.append(Generate.Gen2DFingerprint(mol, signature_factory)) | ||
|
||
scores.append(np.array(similarity(ref_fp, target_fps, **sim_params), dtype=float)) | ||
|
||
return ComponentResults(scores) |
41 changes: 41 additions & 0 deletions
41
contrib/reinvent_plugins/components/RDKit_extra/comp_rdkit_descriptors.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
"""Compute a desired list of RDKit descriptors up to a total of 210""" | ||
|
||
__all__ = ["RDKitDescriptors"] | ||
from dataclasses import dataclass | ||
from typing import List | ||
import logging | ||
|
||
from rdkit import Chem | ||
from rdkit.ML.Descriptors.MoleculeDescriptors import MolecularDescriptorCalculator | ||
import numpy as np | ||
|
||
from ..component_results import ComponentResults | ||
from reinvent_plugins.mol_cache import molcache | ||
from ..add_tag import add_tag | ||
|
||
logger = logging.getLogger("reinvent") | ||
|
||
|
||
@add_tag("__parameters") | ||
@dataclass | ||
class Parameters: | ||
descriptor: List[str] | ||
|
||
|
||
@add_tag("__component") | ||
class RDKitDescriptors: | ||
def __init__(self, params: Parameters): | ||
# collect descriptor from all endpoints: only one descriptor per endpoint | ||
self.calc = MolecularDescriptorCalculator(params.descriptor).CalcDescriptors | ||
|
||
@molcache | ||
def __call__(self, mols: List[Chem.Mol]) -> ComponentResults: | ||
scores = [] | ||
|
||
for mol in mols: | ||
result = self.calc(mol, missingVal=np.NaN) | ||
scores.append(np.array(result)) | ||
|
||
scores = np.array(scores).transpose() | ||
|
||
return ComponentResults(list(scores)) |
219 changes: 219 additions & 0 deletions
219
contrib/reinvent_plugins/components/RDKit_extra/features/BaseFeatures.fdef
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,219 @@ | ||
# $Id$ | ||
# | ||
# RDKit base fdef file. | ||
# Created by Greg Landrum | ||
# | ||
|
||
AtomType NDonor [N&!H0&v3,N&!H0&+1&v4,n&H1&+0] | ||
AtomType AmideN [$(N-C(=O))] | ||
AtomType SulfonamideN [$([N;H0]S(=O)(=O))] | ||
AtomType NDonor [$([Nv3](-C)(-C)-C)] | ||
|
||
AtomType NDonor [$(n[n;H1]),$(nc[n;H1])] | ||
|
||
AtomType ChalcDonor [O,S;H1;+0] | ||
DefineFeature SingleAtomDonor [{NDonor},{ChalcDonor}] | ||
Family Donor | ||
Weights 1 | ||
EndFeature | ||
|
||
# aromatic N, but not indole or pyrole or fusing two rings | ||
AtomType NAcceptor [n;+0;!X3;!$([n;H1](cc)cc)] | ||
AtomType NAcceptor [$([N;H0]#[C&v4])] | ||
# tertiary nitrogen adjacent to aromatic carbon | ||
AtomType NAcceptor [N&v3;H0;$(Nc)] | ||
|
||
# removes thioether and nitro oxygen | ||
AtomType ChalcAcceptor [O;H0;v2;!$(O=N-*)] | ||
Atomtype ChalcAcceptor [O;-;!$(*-N=O)] | ||
|
||
# Removed aromatic sulfur from ChalcAcceptor definition | ||
Atomtype ChalcAcceptor [o;+0] | ||
|
||
# Hydroxyls and acids | ||
AtomType Hydroxyl [O;H1;v2] | ||
|
||
# F is an acceptor so long as the C has no other halogen neighbors. This is maybe | ||
# a bit too general, but the idea is to eliminate things like CF3 | ||
AtomType HalogenAcceptor [F;$(F-[#6]);!$(FC[F,Cl,Br,I])] | ||
|
||
DefineFeature SingleAtomAcceptor [{Hydroxyl},{ChalcAcceptor},{NAcceptor},{HalogenAcceptor}] | ||
Family Acceptor | ||
Weights 1 | ||
EndFeature | ||
|
||
# this one is delightfully easy: | ||
DefineFeature AcidicGroup [C,S](=[O,S,P])-[O;H1,H0&-1] | ||
Family NegIonizable | ||
Weights 1.0,1.0,1.0 | ||
EndFeature | ||
|
||
AtomType Carbon_NotDouble [C;!$(C=*)] | ||
AtomType BasicNH2 [$([N;H2&+0][{Carbon_NotDouble}])] | ||
AtomType BasicNH1 [$([N;H1&+0]([{Carbon_NotDouble}])[{Carbon_NotDouble}])] | ||
AtomType PosNH3 [$([N;H3&+1][{Carbon_NotDouble}])] | ||
AtomType PosNH2 [$([N;H2&+1]([{Carbon_NotDouble}])[{Carbon_NotDouble}])] | ||
AtomType PosNH1 [$([N;H1&+1]([{Carbon_NotDouble}])([{Carbon_NotDouble}])[{Carbon_NotDouble}])] | ||
AtomType BasicNH0 [$([N;H0&+0]([{Carbon_NotDouble}])([{Carbon_NotDouble}])[{Carbon_NotDouble}])] | ||
AtomType QuatN [$([N;H0&+1]([{Carbon_NotDouble}])([{Carbon_NotDouble}])([{Carbon_NotDouble}])[{Carbon_NotDouble}])] | ||
|
||
|
||
DefineFeature BasicGroup [{BasicNH2},{BasicNH1},{BasicNH0};!$(N[a])] | ||
Family PosIonizable | ||
Weights 1.0 | ||
EndFeature | ||
|
||
# 14.11.2007 (GL): add !$([N+]-[O-]) constraint so we don't match | ||
# nitro (or similar) groups | ||
DefineFeature PosN [#7;+;!$([N+]-[O-])] | ||
Family PosIonizable | ||
Weights 1.0 | ||
EndFeature | ||
|
||
# imidazole group can be positively charged (too promiscuous?) | ||
DefineFeature Imidazole c1ncnc1 | ||
Family PosIonizable | ||
Weights 1.0,1.0,1.0,1.0,1.0 | ||
EndFeature | ||
# guanidine group is positively charged (too promiscuous?) | ||
DefineFeature Guanidine NC(=N)N | ||
Family PosIonizable | ||
Weights 1.0,1.0,1.0,1.0 | ||
EndFeature | ||
|
||
# the LigZn binder features were adapted from combichem.fdl | ||
DefineFeature ZnBinder1 [S;D1]-[#6] | ||
Family ZnBinder | ||
Weights 1,0 | ||
EndFeature | ||
DefineFeature ZnBinder2 [#6]-C(=O)-C-[S;D1] | ||
Family ZnBinder | ||
Weights 0,0,1,0,1 | ||
EndFeature | ||
DefineFeature ZnBinder3 [#6]-C(=O)-C-C-[S;D1] | ||
Family ZnBinder | ||
Weights 0,0,1,0,0,1 | ||
EndFeature | ||
|
||
DefineFeature ZnBinder4 [#6]-C(=O)-N-[O;D1] | ||
Family ZnBinder | ||
Weights 0,0,1,0,1 | ||
EndFeature | ||
DefineFeature ZnBinder5 [#6]-C(=O)-[O;D1] | ||
Family ZnBinder | ||
Weights 0,0,1,1 | ||
EndFeature | ||
DefineFeature ZnBinder6 [#6]-P(=O)(-O)-[C,O,N]-[C,H] | ||
Family ZnBinder | ||
Weights 0,0,1,1,0,0 | ||
EndFeature | ||
|
||
|
||
# aromatic rings of various sizes: | ||
# | ||
# Note that with the aromatics, it's important to include the ring-size queries along with | ||
# the aromaticity query for two reasons: | ||
# 1) Much of the current feature-location code assumes that the feature point is | ||
# equidistant from the atoms defining it. Larger definitions like: a1aaaaaaaa1 will actually | ||
# match things like 'o1c2cccc2ccc1', which have an aromatic unit spread across multiple simple | ||
# rings and so don't fit that requirement. | ||
# 2) It's *way* faster. | ||
# | ||
|
||
# | ||
# 21.1.2008 (GL): update ring membership tests to reflect corrected meaning of | ||
# "r" in SMARTS parser | ||
# | ||
AtomType AromR4 [a;r4,!R1&r3] | ||
DefineFeature Arom4 [{AromR4}]1:[{AromR4}]:[{AromR4}]:[{AromR4}]:1 | ||
Family Aromatic | ||
Weights 1.0,1.0,1.0,1.0 | ||
EndFeature | ||
AtomType AromR5 [a;r5,!R1&r4,!R1&r3] | ||
DefineFeature Arom5 [{AromR5}]1:[{AromR5}]:[{AromR5}]:[{AromR5}]:[{AromR5}]:1 | ||
Family Aromatic | ||
Weights 1.0,1.0,1.0,1.0,1.0 | ||
EndFeature | ||
AtomType AromR6 [a;r6,!R1&r5,!R1&r4,!R1&r3] | ||
DefineFeature Arom6 [{AromR6}]1:[{AromR6}]:[{AromR6}]:[{AromR6}]:[{AromR6}]:[{AromR6}]:1 | ||
Family Aromatic | ||
Weights 1.0,1.0,1.0,1.0,1.0,1.0 | ||
EndFeature | ||
AtomType AromR7 [a;r7,!R1&r6,!R1&r5,!R1&r4,!R1&r3] | ||
DefineFeature Arom7 [{AromR7}]1:[{AromR7}]:[{AromR7}]:[{AromR7}]:[{AromR7}]:[{AromR7}]:[{AromR7}]:1 | ||
Family Aromatic | ||
Weights 1.0,1.0,1.0,1.0,1.0,1.0,1.0 | ||
EndFeature | ||
AtomType AromR8 [a;r8,!R1&r7,!R1&r6,!R1&r5,!R1&r4,!R1&r3] | ||
DefineFeature Arom8 [{AromR8}]1:[{AromR8}]:[{AromR8}]:[{AromR8}]:[{AromR8}]:[{AromR8}]:[{AromR8}]:[{AromR8}]:1 | ||
Family Aromatic | ||
Weights 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 | ||
EndFeature | ||
|
||
# hydrophobic features | ||
# any carbon that is not bonded to a polar atom is considered a hydrophobe | ||
# | ||
# 23.11.2007 (GL): match any bond (not just single bonds); add #6 at | ||
# beginning to make it more efficient | ||
AtomType Carbon_Polar [#6;$([#6]~[#7,#8,#9])] | ||
# 23.11.2007 (GL): don't match charged carbon | ||
AtomType Carbon_NonPolar [#6;+0;!{Carbon_Polar}] | ||
|
||
DefineFeature ThreeWayAttach [D3,D4;{Carbon_NonPolar}] | ||
Family Hydrophobe | ||
Weights 1.0 | ||
EndFeature | ||
|
||
DefineFeature ChainTwoWayAttach [R0;D2;{Carbon_NonPolar}] | ||
Family Hydrophobe | ||
Weights 1.0 | ||
EndFeature | ||
|
||
# hydrophobic atom | ||
AtomType Hphobe [c,s,S&H0&v2,Br,I,{Carbon_NonPolar}] | ||
AtomType RingHphobe [R;{Hphobe}] | ||
|
||
# nitro groups in the RD code are always: *-[N+](=O)[O-] | ||
DefineFeature Nitro2 [N;D3;+](=O)[O-] | ||
Family LumpedHydrophobe | ||
Weights 1.0,1.0,1.0 | ||
EndFeature | ||
|
||
# | ||
# 21.1.2008 (GL): update ring membership tests to reflect corrected meaning of | ||
# "r" in SMARTS parser | ||
# | ||
AtomType Ring6 [r6,!R1&r5,!R1&r4,!R1&r3] | ||
DefineFeature RH6_6 [{Ring6};{RingHphobe}]1[{Ring6};{RingHphobe}][{Ring6};{RingHphobe}][{Ring6};{RingHphobe}][{Ring6};{RingHphobe}][{Ring6};{RingHphobe}]1 | ||
Family LumpedHydrophobe | ||
Weights 1.0,1.0,1.0,1.0,1.0,1.0 | ||
EndFeature | ||
|
||
AtomType Ring5 [r5,!R1&r4,!R1&r3] | ||
DefineFeature RH5_5 [{Ring5};{RingHphobe}]1[{Ring5};{RingHphobe}][{Ring5};{RingHphobe}][{Ring5};{RingHphobe}][{Ring5};{RingHphobe}]1 | ||
Family LumpedHydrophobe | ||
Weights 1.0,1.0,1.0,1.0,1.0 | ||
EndFeature | ||
|
||
AtomType Ring4 [r4,!R1&r3] | ||
DefineFeature RH4_4 [{Ring4};{RingHphobe}]1[{Ring4};{RingHphobe}][{Ring4};{RingHphobe}][{Ring4};{RingHphobe}]1 | ||
Family LumpedHydrophobe | ||
Weights 1.0,1.0,1.0,1.0 | ||
EndFeature | ||
|
||
AtomType Ring3 [r3] | ||
DefineFeature RH3_3 [{Ring3};{RingHphobe}]1[{Ring3};{RingHphobe}][{Ring3};{RingHphobe}]1 | ||
Family LumpedHydrophobe | ||
Weights 1.0,1.0,1.0 | ||
EndFeature | ||
|
||
DefineFeature tButyl [C;!R](-[CH3])(-[CH3])-[CH3] | ||
Family LumpedHydrophobe | ||
Weights 1.0,0.0,0.0,0.0 | ||
EndFeature | ||
|
||
DefineFeature iPropyl [CH;!R](-[CH3])-[CH3] | ||
Family LumpedHydrophobe | ||
Weights 1.0,1.0,1.0 | ||
EndFeature | ||
|
Oops, something went wrong.