Skip to content

Commit

Permalink
chemical_featurizer testing; fix other tests; fix use of original che…
Browse files Browse the repository at this point in the history
…mical ids in index of vector table
  • Loading branch information
Tom authored and Tom committed Aug 6, 2024
1 parent 3ce04a8 commit 910c787
Show file tree
Hide file tree
Showing 7 changed files with 363 additions and 128 deletions.
21 changes: 13 additions & 8 deletions comptox_ai/chemical_featurizer/generate_vectors.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from comptox_ai.db.graph_db import GraphDB

from molfeat.trans.fp import FPVecTransformer
from rdkit import Chem, RDLogger
from rdkit.Chem import Descriptors
Expand Down Expand Up @@ -35,10 +34,16 @@ def sanitize_smiles(smiles_list):

print("Sanitizing sMILES")

return [
Chem.MolToSmiles(Chem.MolFromSmiles(smiles, sanitize=True))
for smiles in smiles_list
]
cleaned_smiles = []
for smiles in smiles_list:
try:
cleaned_smiles.append(
Chem.MolToSmiles(Chem.MolFromSmiles(smiles, sanitize=True))
)
except Exception as e:
raise ValueError(f"Invalid SMILES string: {smiles}. Error: {str(e)}")

return cleaned_smiles


def retrieve_smiles(
Expand All @@ -53,7 +58,7 @@ def retrieve_smiles(
A single chemical ID, list of chemicals IDs, or dictionary of {chemical_descriptor : list of chemical IDs} key-value pairs.
chemical_descriptor_type : str
Indicates the chemical descriptor type for chemcials_to_find if chemicals_to_find is str or List[str].
Valid chemical_descriptor types include commonName, Drugbank ID, MeSH ID, PubChem SID, PubChem CID, CasRN, sMILES, DTXSID.
Valid chemical_descriptor types include commonName, Drugbank ID (xrefDrugbank), MeSH ID (xrefMeSH), PubChem SID (xrefPubchemSID), PubChem CID(xrefPubchemCID), CasRN (xrefCasRN), sMILES, DTXSID (xrefDTXSID).
sanitize_smiles_flag : bool
Whether sanitize_smiles() should be run on the retrieved SMILES strings.
Expand Down Expand Up @@ -82,9 +87,9 @@ def retrieve_smiles(
list,
): # Return list of smiles directly no need to query database
if sanitize_smiles_flag:
return sanitize_smiles(chemicals_to_find)
return sanitize_smiles(chemicals_to_find), chemicals_to_find
else:
return chemicals_to_find
return chemicals_to_find, chemicals_to_find

if type(chemicals_to_find) in (str, list, dict):

Expand Down
Loading

0 comments on commit 910c787

Please sign in to comment.