diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 951b6b5..fc89419 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,3 +1,12 @@ +**VERSION 2.3.0 (06/10/2020)** + +- Fixes ZINC URL in `lookup_smile_str.py` +- Adds an optional command line parameter (with arguments `zinc15` or `zinc12`) for `lookup_smile_str.py` + +**VERSION 2.2.0** + +- Provides an optional command line argument (zinc15) to use ZINC15 as a backend for downloading SMILES + **VERSION 2.1.0** - Functions and scripts to fetch ZINC IDs corresponding to a SMILE string query diff --git a/README.md b/README.md index d47ba77..c2a771d 100644 --- a/README.md +++ b/README.md @@ -291,6 +291,11 @@ An example of the CSV file contents opened in an spreadsheet program is shown in # Changelog +**VERSION 2.3.0 (06/10/2020)** + +- Fixes ZINC URL in `lookup_smile_str.py` +- Adds an optional command line parameter (with arguments `zinc15` or `zinc12`) for `lookup_smile_str.py` + **VERSION 2.2.0** * Provides an optional command line argument (zinc15) to use ZINC15 as a backend for downloading SMILES diff --git a/scripts/cmd_line_online_query_scripts/lookup_smile_str.py b/scripts/cmd_line_online_query_scripts/lookup_smile_str.py index 954d131..c854020 100644 --- a/scripts/cmd_line_online_query_scripts/lookup_smile_str.py +++ b/scripts/cmd_line_online_query_scripts/lookup_smile_str.py @@ -24,17 +24,27 @@ def print_usage(): - print('\nUSAGE: python3 ookup_smile_str.py SMILE_str') - print('\n\nEXAMPLE (retrieve data from ZINC):\n' + print('\nUSAGE: python3 lookup_smile_str.py SMILE_str Backend') + print('\n\nUses zinc15 as backend by default' + '\n\nEXAMPLE 1 (retrieve data from ZINC12):\n' 'python3 lookup_smile_str.py' - ' C[C@H]1CCCC[NH+]1CC#CC(c2ccccc2)(c3ccccc3)O"') + ' C[C@H]1CCCC[NH+]1CC#CC(c2ccccc2)(c3ccccc3)O"' + '\n\nEXAMPLE 2 (retrieve data from ZINC12):\n' + 'python3 lookup_smile_str.py' + ' CCOc1ccc(cc1)N([C@@H](C)C(=O)Nc2ccc(cc2C)Cl)S(=O)(=O)C" zinc12') zinc_id = [None] try: smile_str = sys.argv[1] - zinc_ids = smilite.get_zincid_from_smile(smile_str) + + if len(sys.argv) >= 3: + backend = sys.argv[2] + else: + backend = 'zinc15' + + zinc_ids = smilite.get_zincid_from_smile(smile_str, backend=backend) for zid in zinc_ids: print(zid) diff --git a/scripts/cmd_line_online_query_scripts/lookup_zincid.py b/scripts/cmd_line_online_query_scripts/lookup_zincid.py index aace2b4..f05c641 100644 --- a/scripts/cmd_line_online_query_scripts/lookup_zincid.py +++ b/scripts/cmd_line_online_query_scripts/lookup_zincid.py @@ -28,7 +28,7 @@ def print_usage(): - print('\nUSAGE: python3 lookup_zincid.py ZINC_ID [zinc12 (default)/zinc15]') + print('\nUSAGE: python3 lookup_zincid.py ZINC_ID [zinc12 (def.) / zinc15]') print('\n\nEXAMPLES (retrieve data from ZINC):\n' '1) python3 lookup_zincid.py ZINC01234567 zinc12\n' '2) python3 lookup_zincid.py ZINC01234567 zinc15') diff --git a/setup.py b/setup.py index a5b95cc..a9b427c 100755 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup setup(name='smilite', - version='2.2.0', + version='2.3.0', description='smilite is a Python module to download' ' and analyze SMILE strings', author='Sebastian Raschka', @@ -18,7 +18,6 @@ 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 'Development Status :: 5 - Production/Stable', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 2.7', 'Environment :: Console', ], long_description=""" @@ -28,7 +27,6 @@ of chemical compounds from ZINC (a free database of commercially-available compounds for virtual screening, http://zinc.docking.org). -Now supports both Python 3.x and Python 2.x. Source repository: https://github.com/rasbt/smilite diff --git a/smilite/__init__.py b/smilite/__init__.py index 833c9d1..624b8ce 100755 --- a/smilite/__init__.py +++ b/smilite/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2014 Sebastian Raschka +# Copyright 2014-2020 Sebastian Raschka # # A small module to retrieve SMILE strings # (Simplified molecular-input line-entry system) from the ZINC online @@ -18,5 +18,5 @@ from .smilite import sqlite_to_csv from .smilite import get_zincid_from_smile -__version__ = '2.1.1' +__version__ = '2.3.0' diff --git a/smilite/smilite.py b/smilite/smilite.py index acae0d1..161dcc0 100755 --- a/smilite/smilite.py +++ b/smilite/smilite.py @@ -1,4 +1,4 @@ -# Copyright 2014 Sebastian Raschka +# Copyright 2014-2020 Sebastian Raschka # # smilite is a Python module to download and analyze SMILE strings # (Simplified Molecular-Input Line-entry System) of chemical compounds @@ -82,7 +82,7 @@ def get_zinc_smile(zinc_id, backend='zinc12'): return smile_str -def get_zincid_from_smile(smile_str): +def get_zincid_from_smile(smile_str, backend='zinc15'): """ Gets the corresponding ZINC ID(s) for a SMILE string query from the ZINC online database. Requires an internet connection. @@ -90,42 +90,67 @@ def get_zincid_from_smile(smile_str): Keyword arguments: smile_str (str): A valid SMILE string, e.g., C[C@H]1CCCC[NH+]1CC#CC(c2ccccc2)(c3ccccc3)O' + backend (str): Specifies the database backend, "zinc12" or "zinc15" Returns the SMILE string for the corresponding ZINC ID(s) in a list. E.g., ['ZINC01234567', 'ZINC01234568', 'ZINC01242053', 'ZINC01242055'] """ + + if backend not in {'zinc12', 'zinc15'}: + raise ValueError("backend must be 'zinc12' or 'zinc15'") + stripped_smile = smile_str.strip() + encoded_smile = urllib.parse.quote(stripped_smile) + + if backend == 'zinc12': + url_part1 = 'http://zinc12.docking.org/results?structure.smiles=' + url_part3 = '&structure.similarity=1.0' + elif backend == 'zinc15': + url_part1 = 'http://zinc.docking.org/substances/search/?q=' + url_part3 = '' + else: + raise ValueError("Backend must be 'zinc12' or 'zinc15'. " + "Got %s" % (backend)) zinc_ids = [] - url_part1 = 'http://zinc.docking.org/results?structure.smiles=' - url_part3 = '&structure.similarity=1.0' + try: if sys.version_info[0] == 3: - smile_url = urllib.request.pathname2url(stripped_smile) + #smile_url = urllib.request.pathname2url(encoded_smile) response = urllib.request.urlopen('{}{}{}' .format(url_part1, - smile_url, + encoded_smile, url_part3)) else: - smile_url = urllib.pathname2url(stripped_smile) + #smile_url = urllib.pathname2url(encoded_smile) response = urllib.urlopen('{}{}{}' .format(url_part1, - smile_url, + encoded_smile, url_part3)) except urllib.error.HTTPError: print('Invalid SMILE string {}'.format(smile_str)) response = [] for line in response: line = line.decode(encoding='UTF-8').strip() - if line.startswith('