Skip to content

Commit

Permalink
Merge branch 'release/v0.4.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
philopon committed Aug 16, 2017
2 parents b52520a + 7fb3b2e commit bf6c8a9
Show file tree
Hide file tree
Showing 14 changed files with 296 additions and 56 deletions.
58 changes: 58 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,64 @@ examples
as command
~~~~~~~~~~

calculate all descriptors

.. code:: console
$ python -m mordred example.smi
name,ECIndex,WPath,WPol,Zagreb1, (snip)
benzene,36,27,3,24.0, (snip)
chrolobenzene,45,42,5,30.0, (snip)
save to file (display progress bar)

.. code:: console
$ python -m mordred example.smi -o example.csv
50%|███████████████████████████████████████▌ | 1/2 [00:00<00:00, 7.66it/s]
stream read (low memory, no number of molecules information)

.. code:: console
$ python -m mordred example.smi -s -o example.csv
0it [00:00, ?it/s]
only ABCIndex

.. code:: console
$ python -m mordred example.smi -d ABCIndex
name,ABC,ABCGG
benzene,4.242640687119286,3.9999999999999996
chlorobenzene,5.059137268047012,4.785854275382693
ABCIndex and AcidBase

.. code:: console
$ python -m mordred example.smi -d ABCIndex -d AcidBase
name,ABC,ABCGG,nAcid,nBase
benzene,4.242640687119286,3.9999999999999996,0,0
chlorobenzene,5.059137268047012,4.785854275382693,0,0
multiple input

.. code:: console
$ python -m mordred example.smi example2.smi -d ABCIndex
name,ABC,ABCGG
benzene,4.242640687119286,3.9999999999999996
chlorobenzene,5.059137268047012,4.785854275382693
pentane,2.8284271247461903,3.1462643699419726
show help

.. code:: console
$ python -m mordred --help
usage: python -m mordred [-h] [--version] [-t {auto,sdf,mol,smi}] [-o OUTPUT]
[-p PROCESSES] [-q] [-s] [-d DESC] [-3] [-v]
INPUT [INPUT ...]
Expand Down Expand Up @@ -121,6 +177,8 @@ as library
1 1.3922
2 1.2688
Name: SLogP, dtype: float64
see `examples <examples>`_

Documentation
-------------
Expand Down
3 changes: 3 additions & 0 deletions docs/mordred.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ mordred package

.. automodule:: mordred

.. autoclass:: mordred.Result
:members:

.. autoclass:: mordred.Descriptor
:members:

Expand Down
20 changes: 20 additions & 0 deletions examples/010-single_mol-single_desc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from rdkit import Chem
from mordred import Chi, ABCIndex

benzene = Chem.MolFromSmiles('c1ccccc1')

# create descriptor instance
abci = ABCIndex.ABCIndex()

# calculate descriptor value
result = abci(benzene)

print(str(abci), result)

# create descriptor instance with parameter
chi_pc4 = Chi.Chi(type='path_cluster', order=4)

# calculate
result = chi_pc4(benzene)

print(str(chi_pc4), result)
53 changes: 53 additions & 0 deletions examples/020-single_mol-multiple_desc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from rdkit import Chem
from mordred import Chi, ABCIndex, RingCount, Calculator, is_missing, descriptors

benzene = Chem.MolFromSmiles('c1ccccc1')

# Create empty Calculator instance
calc1 = Calculator()

# Register descriptor instance
calc1.register(Chi.Chi(type='path_cluster', order=4))

# Register descriptor class using preset
calc1.register(RingCount.RingCount)

# Register all descriptors in module
calc1.register(ABCIndex)


# Calculate descriptors
result = calc1(benzene)

print(result)
# >>> [0.0, 1, 0, 0, 0, 1, (snip)


# Calculator constructor can register descriptors
calc2 = Calculator(Chi.Chi)

# Descriptors module contains all descriptors
calc3 = Calculator(descriptors)

# User can access all descriptor instances by descriptors property
print(calc3.descriptors)
# >>> (mordred.EccentricConnectivityIndex.EccentricConnectivityIndex(), (snip)


# Calculate descriptors
result = calc3(benzene)

# get first missing value
na1 = next(r for r in result if is_missing(r))

# get reason
print(na1.error)
# >>> missing 3D coordinate


# Delete all missing value
result = result.dropna()


# convert to dict
print(result.asdict())
17 changes: 17 additions & 0 deletions examples/030-multiple_mol-multiple_desc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from rdkit import Chem
from mordred import Calculator, descriptors

mols = [
Chem.MolFromSmiles('c1ccccc1'),
Chem.MolFromSmiles('c1ccccc1Cl'),
Chem.MolFromSmiles('c1ccccc1C'),
]

# Create Calculator
calc = Calculator(descriptors)

# map method calculate multiple molecules (return generator)
print(list(calc.map(mols)))

# pandas method calculate multiple molecules (return pandas DataFrame)
print(calc.pandas(mols))
16 changes: 16 additions & 0 deletions examples/110-descriptor-arithmetic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from rdkit import Chem
from mordred import Chi, ABCIndex

benzene = Chem.MolFromSmiles('c1ccccc1')

# create descriptor instance
abci = ABCIndex.ABCIndex()
chi_p2 = Chi.Chi(type='path', order=2)

# create product term using descriptor arithmetic
abci_x_chi_p2 = abci * chi_p2

# calculate descriptor value
result = abci_x_chi_p2(benzene)

print(abci_x_chi_p2, result)
2 changes: 2 additions & 0 deletions mordred/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Descriptor,
get_descriptors_from_module,
is_missing,
Result,
)

from ._version import __version__
Expand All @@ -17,4 +18,5 @@
"all_descriptors",
"get_descriptors_from_module",
"is_missing",
"Result",
)
43 changes: 3 additions & 40 deletions mordred/_base/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
"""Mordred base package."""

import os
import warnings

from importlib import import_module
from ..error import MissingValueBase

from .descriptor import (
Expand All @@ -12,6 +8,8 @@
)
from .calculator import Calculator, get_descriptors_from_module
from .parallel import parallel
from .util import is_missing, all_descriptors
from .result import Result


__all__ = (
Expand All @@ -20,32 +18,10 @@
"Calculator",
"get_descriptors_from_module",
"is_missing",
"Result",
)


def all_descriptors():
r"""**[deprecated]** use mordred.descriptors module instead.
yield all descriptor modules.
:returns: all modules
:rtype: :py:class:`Iterator` (:py:class:`Descriptor`)
"""
warnings.warn(
"all_descriptors() is deprecated, use mordred.descriptors module instead",
DeprecationWarning,
stacklevel=2,
)
base_dir = os.path.dirname(os.path.dirname(__file__))

for name in os.listdir(base_dir):
name, ext = os.path.splitext(name)
if name[:1] == "_" or ext != ".py" or name == "descriptors":
continue

yield import_module(".." + name, __package__)


def _Descriptor__call__(self, mol, id=-1):
r"""Calculate single descriptor value.
Expand Down Expand Up @@ -116,19 +92,6 @@ def _Descriptor_from_json(self, obj):
return _from_json(obj, descs)


def is_missing(v):
"""Check argument is either MissingValue or not.
Parameters:
v(any): value
Returns:
bool
"""
return isinstance(v, MissingValueBase)


Descriptor.__call__ = _Descriptor__call__
Descriptor.from_json = _Descriptor_from_json
Calculator._parallel = parallel
30 changes: 17 additions & 13 deletions mordred/_base/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

import sys
from types import ModuleType
from inspect import getsourcelines
from contextlib import contextmanager

from tqdm import tqdm

from .._util import Capture, DummyBar, NotebookWrapper
from ..error import Error, Missing, MultipleFragments, DuplicatedDescriptorName
from .result import Result
from .context import Context
from .descriptor import Descriptor, MissingValueException, is_descriptor_class

Expand Down Expand Up @@ -235,16 +235,21 @@ def __call__(self, mol, id=-1):
:type id: int
:param id: conformer id
:rtype: [scalar or Error]
:rtype: Result[scalar or Error]
:returns: iterator of descriptor and value
"""
return list(self._calculate(Context.from_calculator(self, mol, id)))
return self._wrap_result(
self._calculate(Context.from_calculator(self, mol, id)),
)

def _wrap_result(self, r):
return Result(r, self._descriptors)

def _serial(self, mols, nmols, quiet, ipynb, id):
with self._progress(quiet, nmols, ipynb) as bar:
for m in mols:
with Capture() as capture:
r = list(self._calculate(Context.from_calculator(self, m, id)))
r = self._wrap_result(self._calculate(Context.from_calculator(self, m, id)))

for e in capture.result:
e = e.rstrip()
Expand Down Expand Up @@ -314,7 +319,7 @@ def map(self, mols, nproc=None, nmols=None, quiet=False, ipynb=False, id=-1):
id(int): conformer id to use. default: -1.
Returns:
Iterator[scalar]
Iterator[Result[scalar]]
"""
if hasattr(mols, "__len__"):
Expand All @@ -334,9 +339,15 @@ def pandas(self, mols, nproc=None, nmols=None, quiet=False, ipynb=False, id=-1):
"""
import pandas

if isinstance(mols, pandas.Series):
index = mols.index
else:
index = None

return pandas.DataFrame(
self.map(mols, nproc, nmols, quiet, ipynb, id),
(list(r) for r in self.map(mols, nproc, nmols, quiet, ipynb, id)),
columns=[str(d) for d in self.descriptors],
index=index,
)


Expand Down Expand Up @@ -373,11 +384,4 @@ def get_descriptors_from_module(mdl, submodule=False):
if is_descriptor_class(fn)
]

def key_by_def(d):
try:
return getsourcelines(d)[1]
except IOError:
return sys.maxsize

descs.sort(key=key_by_def)
return descs
2 changes: 1 addition & 1 deletion mordred/_base/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def do_task(mol):

bar.write(e)

yield r
yield self._wrap_result(r)
bar.update()

finally:
Expand Down
Loading

0 comments on commit bf6c8a9

Please sign in to comment.