Skip to content

Commit

Permalink
v 2.3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt committed Jun 10, 2020
1 parent c999cf3 commit ad801a0
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 26 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
**VERSION 2.3.0 (06/10/2020)**

- Fixes ZINC URL in `lookup_smile_str.py`
- Adds an optional command line parameter (with arguments `zinc15` or `zinc12`) for `lookup_smile_str.py`

**VERSION 2.2.0**

- Provides an optional command line argument (zinc15) to use ZINC15 as a backend for downloading SMILES

**VERSION 2.1.0**

- Functions and scripts to fetch ZINC IDs corresponding to a SMILE string query
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,11 @@ An example of the CSV file contents opened in an spreadsheet program is shown in

# Changelog

**VERSION 2.3.0 (06/10/2020)**

- Fixes ZINC URL in `lookup_smile_str.py`
- Adds an optional command line parameter (with arguments `zinc15` or `zinc12`) for `lookup_smile_str.py`

**VERSION 2.2.0**

* Provides an optional command line argument (zinc15) to use ZINC15 as a backend for downloading SMILES
Expand Down
18 changes: 14 additions & 4 deletions scripts/cmd_line_online_query_scripts/lookup_smile_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,27 @@


def print_usage():
print('\nUSAGE: python3 ookup_smile_str.py SMILE_str')
print('\n\nEXAMPLE (retrieve data from ZINC):\n'
print('\nUSAGE: python3 lookup_smile_str.py SMILE_str Backend')
print('\n\nUses zinc15 as backend by default'
'\n\nEXAMPLE 1 (retrieve data from ZINC12):\n'
'python3 lookup_smile_str.py'
' C[C@H]1CCCC[NH+]1CC#CC(c2ccccc2)(c3ccccc3)O"')
' C[C@H]1CCCC[NH+]1CC#CC(c2ccccc2)(c3ccccc3)O"'
'\n\nEXAMPLE 2 (retrieve data from ZINC12):\n'
'python3 lookup_smile_str.py'
' CCOc1ccc(cc1)N([C@@H](C)C(=O)Nc2ccc(cc2C)Cl)S(=O)(=O)C" zinc12')


zinc_id = [None]

try:
smile_str = sys.argv[1]
zinc_ids = smilite.get_zincid_from_smile(smile_str)

if len(sys.argv) >= 3:
backend = sys.argv[2]
else:
backend = 'zinc15'

zinc_ids = smilite.get_zincid_from_smile(smile_str, backend=backend)
for zid in zinc_ids:
print(zid)

Expand Down
2 changes: 1 addition & 1 deletion scripts/cmd_line_online_query_scripts/lookup_zincid.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@


def print_usage():
print('\nUSAGE: python3 lookup_zincid.py ZINC_ID [zinc12 (default)/zinc15]')
print('\nUSAGE: python3 lookup_zincid.py ZINC_ID [zinc12 (def.) / zinc15]')
print('\n\nEXAMPLES (retrieve data from ZINC):\n'
'1) python3 lookup_zincid.py ZINC01234567 zinc12\n'
'2) python3 lookup_zincid.py ZINC01234567 zinc15')
Expand Down
4 changes: 1 addition & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup

setup(name='smilite',
version='2.2.0',
version='2.3.0',
description='smilite is a Python module to download'
' and analyze SMILE strings',
author='Sebastian Raschka',
Expand All @@ -18,7 +18,6 @@
'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
'Development Status :: 5 - Production/Stable',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 2.7',
'Environment :: Console',
],
long_description="""
Expand All @@ -28,7 +27,6 @@
of chemical compounds from ZINC
(a free database of commercially-available compounds for virtual screening,
http://zinc.docking.org).
Now supports both Python 3.x and Python 2.x.
Source repository: https://github.com/rasbt/smilite
Expand Down
4 changes: 2 additions & 2 deletions smilite/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2014 Sebastian Raschka
# Copyright 2014-2020 Sebastian Raschka
#
# A small module to retrieve SMILE strings
# (Simplified molecular-input line-entry system) from the ZINC online
Expand All @@ -18,5 +18,5 @@
from .smilite import sqlite_to_csv
from .smilite import get_zincid_from_smile

__version__ = '2.1.1'
__version__ = '2.3.0'

57 changes: 41 additions & 16 deletions smilite/smilite.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2014 Sebastian Raschka
# Copyright 2014-2020 Sebastian Raschka
#
# smilite is a Python module to download and analyze SMILE strings
# (Simplified Molecular-Input Line-entry System) of chemical compounds
Expand Down Expand Up @@ -82,50 +82,75 @@ def get_zinc_smile(zinc_id, backend='zinc12'):
return smile_str


def get_zincid_from_smile(smile_str):
def get_zincid_from_smile(smile_str, backend='zinc15'):
"""
Gets the corresponding ZINC ID(s) for a SMILE string query from
the ZINC online database. Requires an internet connection.
Keyword arguments:
smile_str (str): A valid SMILE string, e.g.,
C[C@H]1CCCC[NH+]1CC#CC(c2ccccc2)(c3ccccc3)O'
backend (str): Specifies the database backend, "zinc12" or "zinc15"
Returns the SMILE string for the corresponding ZINC ID(s) in a list.
E.g., ['ZINC01234567', 'ZINC01234568', 'ZINC01242053', 'ZINC01242055']
"""

if backend not in {'zinc12', 'zinc15'}:
raise ValueError("backend must be 'zinc12' or 'zinc15'")

stripped_smile = smile_str.strip()
encoded_smile = urllib.parse.quote(stripped_smile)

if backend == 'zinc12':
url_part1 = 'http://zinc12.docking.org/results?structure.smiles='
url_part3 = '&structure.similarity=1.0'
elif backend == 'zinc15':
url_part1 = 'http://zinc.docking.org/substances/search/?q='
url_part3 = ''
else:
raise ValueError("Backend must be 'zinc12' or 'zinc15'. "
"Got %s" % (backend))

zinc_ids = []
url_part1 = 'http://zinc.docking.org/results?structure.smiles='
url_part3 = '&structure.similarity=1.0'

try:
if sys.version_info[0] == 3:
smile_url = urllib.request.pathname2url(stripped_smile)
#smile_url = urllib.request.pathname2url(encoded_smile)
response = urllib.request.urlopen('{}{}{}'
.format(url_part1,
smile_url,
encoded_smile,
url_part3))
else:
smile_url = urllib.pathname2url(stripped_smile)
#smile_url = urllib.pathname2url(encoded_smile)
response = urllib.urlopen('{}{}{}'
.format(url_part1,
smile_url,
encoded_smile,
url_part3))
except urllib.error.HTTPError:
print('Invalid SMILE string {}'.format(smile_str))
response = []
for line in response:
line = line.decode(encoding='UTF-8').strip()
if line.startswith('<a href="//zinc.docking.org/substance/'):
line = line.split('</a>')[-2].split('>')[-1]
if sys.version_info[0] == 3:
zinc_id = urllib.parse.unquote(line)
else:
zinc_id = urllib.unquote(line)
zinc_id = 'ZINC' + (8-len(zinc_id)) * '0' + zinc_id
zinc_ids.append(str(zinc_id))

if backend == 'zinc15':
if line.startswith('<a href="/substances/ZINC'):
line = line.split('/')[-2]
if sys.version_info[0] == 3:
zinc_id = urllib.parse.unquote(line)
else:
zinc_id = urllib.unquote(line)
zinc_ids.append(str(zinc_id))
else:
if line.startswith('<a href="//zinc.docking.org/substance/'):
line = line.split('</a>')[-2].split('>')[-1]
if sys.version_info[0] == 3:
zinc_id = urllib.parse.unquote(line)
else:
zinc_id = urllib.unquote(line)
zinc_id = 'ZINC' + (8-len(zinc_id)) * '0' + zinc_id
zinc_ids.append(str(zinc_id))
return zinc_ids


Expand Down

0 comments on commit ad801a0

Please sign in to comment.