-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #207 from hjkgrp/mol2D
Mol2D
- Loading branch information
Showing
15 changed files
with
592 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ dependencies: | |
- pandas | ||
- scipy | ||
- libffi | ||
- networkx >= 2.7 | ||
- importlib_resources | ||
- pip | ||
- pip: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ dependencies: | |
- pandas | ||
- scipy | ||
- libffi | ||
- networkx >= 2.7 | ||
- importlib_resources | ||
- pip | ||
- pip: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
import networkx as nx | ||
import numpy as np | ||
from packaging import version | ||
from molSimplify.Classes.globalvars import globalvars | ||
|
||
try: | ||
from openbabel import openbabel # version 3 style import | ||
except ImportError: | ||
import openbabel # fallback to version 2 | ||
|
||
|
||
class Mol2D(nx.Graph): | ||
|
||
def __repr__(self): | ||
atom_counts = {} | ||
for _, sym in self.nodes.data(data="symbol", default="X"): | ||
if sym not in atom_counts: | ||
atom_counts[sym] = 1 | ||
else: | ||
atom_counts[sym] += 1 | ||
|
||
symbols = globalvars().elementsbynum() | ||
formula = "" | ||
for sym in sorted(atom_counts.keys(), | ||
key=lambda x: symbols.index(x), reverse=True): | ||
formula += f"{sym}{atom_counts[sym]}" | ||
return f"Mol2D({formula})" | ||
|
||
@classmethod | ||
def from_smiles(cls, smiles): | ||
mol = cls() | ||
|
||
# Load using openbabel OBMol | ||
obConversion = openbabel.OBConversion() | ||
OBMol = openbabel.OBMol() | ||
obConversion.SetInFormat('smi') | ||
obConversion.ReadString(OBMol, smiles) | ||
OBMol.AddHydrogens() | ||
|
||
symbols = globalvars().elementsbynum() | ||
# Add atoms | ||
for i, atom in enumerate(openbabel.OBMolAtomIter(OBMol)): | ||
sym = symbols[atom.GetAtomicNum() - 1] | ||
mol.add_node(i, symbol=sym) | ||
|
||
# Add bonds | ||
for bond in openbabel.OBMolBondIter(OBMol): | ||
# Subtract 1 because of zero indexing vs. one indexing | ||
mol.add_edge(bond.GetBeginAtomIdx() - 1, bond.GetEndAtomIdx() - 1) | ||
|
||
return mol | ||
|
||
@classmethod | ||
def from_mol2_file(cls, filename): | ||
mol = cls() | ||
|
||
with open(filename, "r") as fin: | ||
lines = fin.readlines() | ||
|
||
# Read counts line: | ||
sp = lines[2].split() | ||
n_atoms = int(sp[0]) | ||
n_bonds = int(sp[1]) | ||
|
||
atom_start = lines.index("@<TRIPOS>ATOM\n") + 1 | ||
for i, line in enumerate(lines[atom_start:atom_start + n_atoms]): | ||
sp = line.split() | ||
sym = sp[5].split(".")[0] | ||
mol.add_node(i, symbol=sym) | ||
|
||
bond_start = lines.index("@<TRIPOS>BOND\n") + 1 | ||
for line in lines[bond_start:bond_start + n_bonds]: | ||
sp = line.split() | ||
# Subtract 1 because of zero indexing vs. one indexing | ||
mol.add_edge(int(sp[1]) - 1, int(sp[2]) - 1) | ||
|
||
return mol | ||
|
||
@classmethod | ||
def from_mol_file(cls, filename): | ||
mol = cls() | ||
|
||
with open(filename, "r") as fin: | ||
lines = fin.readlines() | ||
|
||
# Read counts line: | ||
sp = lines[3].split() | ||
n_atoms = int(sp[0]) | ||
n_bonds = int(sp[1]) | ||
|
||
# Add atoms (offset of 4 for the header lines): | ||
for i, line in enumerate(lines[4:4 + n_atoms]): | ||
sp = line.split() | ||
mol.add_node(i, symbol=sp[3]) | ||
|
||
# Add bonds: | ||
for line in lines[4 + n_atoms:4 + n_atoms + n_bonds]: | ||
sp = line.split() | ||
# Subtract 1 because of zero indexing vs. one indexing | ||
mol.add_edge(int(sp[0]) - 1, int(sp[1]) - 1) | ||
|
||
return mol | ||
|
||
def graph_hash(self): | ||
# This is necessary because networkx < 2.7 had a bug in the implementation | ||
# of weisfeiler_lehman_graph_hash | ||
# https://github.com/networkx/networkx/pull/4946#issuecomment-914623654 | ||
assert version.parse(nx.__version__) >= version.parse("2.7") | ||
return nx.weisfeiler_lehman_graph_hash(self, node_attr="symbol") | ||
|
||
def graph_hash_edge_attr(self): | ||
# This is necessary because networkx < 2.7 had a bug in the implementation | ||
# of weisfeiler_lehman_graph_hash | ||
# https://github.com/networkx/networkx/pull/4946#issuecomment-914623654 | ||
assert version.parse(nx.__version__) >= version.parse("2.7") | ||
# Copy orginal graph before adding edge attributes | ||
G = self.copy() | ||
|
||
for i, j in G.edges: | ||
G.edges[i, j]["label"] = "".join(sorted([G.nodes[i]["symbol"], | ||
G.nodes[j]["symbol"]])) | ||
|
||
return nx.weisfeiler_lehman_graph_hash(G, edge_attr="label") | ||
|
||
def graph_determinant(self, return_string=True): | ||
atomic_masses = globalvars().amass() | ||
weights = np.array( | ||
[ | ||
atomic_masses[atom[1]][0] | ||
for atom in self.nodes(data="symbol", default="X") | ||
] | ||
) | ||
adjacency = nx.to_numpy_array(self) | ||
mat = np.outer(weights, weights) * adjacency | ||
np.fill_diagonal(mat, weights) | ||
with np.errstate(over='raise'): | ||
try: | ||
det = np.linalg.det(mat) | ||
except (np.linalg.LinAlgError, FloatingPointError): | ||
(sign, det) = np.linalg.slogdet(mat) | ||
if sign != 0: | ||
det = sign*det | ||
if return_string: | ||
det = str(det) | ||
if "e+" in det: | ||
sp = det.split("e+") | ||
det = sp[0][0:12] + "e+" + sp[1] | ||
else: | ||
det = det[0:12] | ||
return det |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,33 @@ | ||
|
||
OpenBabel11081614413D | ||
OpenBabel03142421563D | ||
|
||
14 13 0 0 0 0 0 0 0 0999 V2000 | ||
3.1117 0.8158 -1.3940 O 0 5 0 0 0 0 0 0 0 0 0 0 | ||
2.5035 0.2178 -0.5193 C 0 0 0 0 0 0 0 0 0 0 0 0 | ||
1.0119 0.1135 -0.5927 C 0 0 0 0 0 0 0 0 0 0 0 0 | ||
3.2352 -0.4172 0.6354 C 0 0 0 0 0 0 0 0 0 0 0 0 | ||
4.7377 -0.3230 0.7389 C 0 0 0 0 0 0 0 0 0 0 0 0 | ||
5.3853 0.2682 -0.1114 O 0 0 0 0 0 0 0 0 0 0 0 0 | ||
5.4522 -0.9568 1.8929 C 0 0 0 0 0 0 0 0 0 0 0 0 | ||
0.6247 0.6297 -1.4972 H 0 0 0 0 0 0 0 0 0 0 0 0 | ||
0.5611 0.5821 0.3070 H 0 0 0 0 0 0 0 0 0 0 0 0 | ||
0.7146 -0.9552 -0.6360 H 0 0 0 0 0 0 0 0 0 0 0 0 | ||
4.7384 -1.4561 2.5816 H 0 0 0 0 0 0 0 0 0 0 0 0 | ||
6.0070 -0.1796 2.4590 H 0 0 0 0 0 0 0 0 0 0 0 0 | ||
6.1702 -1.7146 1.5151 H 0 0 0 0 0 0 0 0 0 0 0 0 | ||
2.7035 -0.9212 1.3622 H 0 0 0 0 0 0 0 0 0 0 0 0 | ||
3.1144 0.8372 -1.4020 O 0 0 0 0 0 0 0 0 0 0 0 0 | ||
2.5505 0.2219 -0.5095 C 0 0 0 0 0 0 0 0 0 0 0 0 | ||
1.0576 0.0909 -0.5564 C 0 0 0 0 0 0 0 0 0 0 0 0 | ||
3.3222 -0.3885 0.6057 C 0 0 0 0 0 0 0 0 0 0 0 0 | ||
4.6639 -0.3188 0.7249 C 0 0 0 0 0 0 0 0 0 0 0 0 | ||
5.4090 0.3313 -0.1923 O 0 5 0 0 0 0 0 0 0 0 0 0 | ||
5.3707 -0.9629 1.8831 C 0 0 0 0 0 0 0 0 0 0 0 0 | ||
0.5953 1.1001 -0.5394 H 0 0 0 0 0 0 0 0 0 0 0 0 | ||
0.6737 -0.4886 0.3095 H 0 0 0 0 0 0 0 0 0 0 0 0 | ||
0.7595 -0.4299 -1.4905 H 0 0 0 0 0 0 0 0 0 0 0 0 | ||
2.7584 -0.9218 1.3662 H 0 0 0 0 0 0 0 0 0 0 0 0 | ||
4.6658 -1.4697 2.5767 H 0 0 0 0 0 0 0 0 0 0 0 0 | ||
5.9265 -0.1881 2.4512 H 0 0 0 0 0 0 0 0 0 0 0 0 | ||
6.0904 -1.7166 1.5010 H 0 0 0 0 0 0 0 0 0 0 0 0 | ||
1 2 2 0 0 0 0 | ||
2 3 1 0 0 0 0 | ||
2 4 1 0 0 0 0 | ||
3 8 1 0 0 0 0 | ||
3 9 1 0 0 0 0 | ||
3 10 1 0 0 0 0 | ||
4 5 2 0 0 0 0 | ||
4 14 1 0 0 0 0 | ||
4 11 1 0 0 0 0 | ||
5 6 1 0 0 0 0 | ||
5 7 1 0 0 0 0 | ||
7 11 1 0 0 0 0 | ||
7 12 1 0 0 0 0 | ||
7 13 1 0 0 0 0 | ||
M CHG 1 5 -1 | ||
7 14 1 0 0 0 0 | ||
M CHG 1 6 -1 | ||
M END |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
import pytest | ||
from molSimplify.Classes.mol2D import Mol2D | ||
|
||
|
||
def water_Mol2D(): | ||
mol = Mol2D() | ||
mol.add_nodes_from( | ||
[(0, {"symbol": "O"}), (1, {"symbol": "H"}), (2, {"symbol": "H"})] | ||
) | ||
mol.add_edges_from([(0, 1), (0, 2)]) | ||
return mol | ||
|
||
|
||
def furan_Mol2D(): | ||
mol = Mol2D() | ||
mol.add_nodes_from( | ||
[ | ||
(0, {"symbol": "O"}), | ||
(1, {"symbol": "C"}), | ||
(2, {"symbol": "C"}), | ||
(3, {"symbol": "C"}), | ||
(4, {"symbol": "C"}), | ||
(5, {"symbol": "H"}), | ||
(6, {"symbol": "H"}), | ||
(7, {"symbol": "H"}), | ||
(8, {"symbol": "H"}), | ||
] | ||
) | ||
mol.add_edges_from( | ||
[(0, 1), (1, 2), (2, 3), (3, 4), (0, 4), (1, 5), (2, 6), (3, 7), (4, 8)] | ||
) | ||
return mol | ||
|
||
|
||
def acac_Mol2D(): | ||
mol = Mol2D() | ||
mol.add_nodes_from( | ||
[ | ||
(0, {"symbol": "O"}), | ||
(1, {"symbol": "C"}), | ||
(2, {"symbol": "C"}), | ||
(3, {"symbol": "C"}), | ||
(4, {"symbol": "C"}), | ||
(5, {"symbol": "O"}), | ||
(6, {"symbol": "C"}), | ||
(7, {"symbol": "H"}), | ||
(8, {"symbol": "H"}), | ||
(9, {"symbol": "H"}), | ||
(10, {"symbol": "H"}), | ||
(11, {"symbol": "H"}), | ||
(12, {"symbol": "H"}), | ||
(13, {"symbol": "H"}), | ||
] | ||
) | ||
mol.add_edges_from( | ||
[(0, 1), (1, 2), (1, 3), (2, 7), (2, 8), (2, 9), (3, 4), | ||
(3, 10), (4, 5), (4, 6), (6, 11), (6, 12), (6, 13)] | ||
) | ||
return mol | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"mol, ref", | ||
[ | ||
(water_Mol2D(), "Mol2D(O1H2)"), | ||
(furan_Mol2D(), "Mol2D(O1C4H4)"), | ||
(acac_Mol2D(), "Mol2D(O2C5H7)"), | ||
]) | ||
def test_Mol2D_repr(mol, ref): | ||
assert mol.__repr__() == ref | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"name, smiles, mol_ref", | ||
[ | ||
("water", "O", water_Mol2D()), | ||
("furan", "o1cccc1", furan_Mol2D()), | ||
("acac", "[O-]-C(C)=CC(=O)C", acac_Mol2D()), | ||
] | ||
) | ||
def test_Mol2D_constructors(resource_path_root, name, smiles, mol_ref): | ||
# From mol file | ||
mol = Mol2D.from_mol_file(resource_path_root / "inputs" / "io" / f"{name}.mol") | ||
|
||
assert mol.nodes == mol_ref.nodes | ||
assert mol.edges == mol_ref.edges | ||
|
||
# From mol2 file | ||
mol = Mol2D.from_mol2_file(resource_path_root / "inputs" / "io" / f"{name}.mol2") | ||
|
||
assert mol.nodes == mol_ref.nodes | ||
assert mol.edges == mol_ref.edges | ||
|
||
# From smiles | ||
mol = Mol2D.from_smiles(smiles) | ||
|
||
assert mol.nodes == mol_ref.nodes | ||
assert mol.edges == mol_ref.edges | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"filename, node_hash_ref, edge_hash_ref, graph_det_ref", | ||
[ | ||
("water.mol2", "2c96eb4288d63b2a9437ddd4172e67f3", "e5206dd18f7e829cf77fae1a48e7b0b9", "-507.9380086"), | ||
("formaldehyde.mol2", "e4af80ed7395aef9001d1b170c5c0ec6", "df21357bb47fe3aa2e062c7e3a3b573e", "-42043.85450"), | ||
("furan.mol2", "f6090276d7369c0c0a535113ec1d97cf", "a9b6fbc7b5f53613546d5e91a7544ed6", "-19404698740"), | ||
("acac.mol2", "0dfc6836e022406f1be7b5627451d590", "d4879258a3a6c832b55b2e9d7387f96d", "-2.822252628e+16"), | ||
("ADUYUV.mol2", "e134585068e342d12364850112ec7609", "9bc68a69bfeceabffe878084da72f542", "2.7503745443e+80"), | ||
] | ||
) | ||
def test_Mol2D_graph_hash(resource_path_root, filename, node_hash_ref, edge_hash_ref, graph_det_ref): | ||
# From mol file | ||
mol = Mol2D.from_mol2_file(resource_path_root / "inputs" / "io" / filename) | ||
|
||
node_hash = mol.graph_hash() | ||
edge_hash = mol.graph_hash_edge_attr() | ||
graph_det = mol.graph_determinant() | ||
|
||
assert node_hash == node_hash_ref | ||
assert edge_hash == edge_hash_ref | ||
assert graph_det == graph_det_ref |
Oops, something went wrong.