From 2d164027177e684ff4894085adb2407af7a91ba0 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Mon, 27 Sep 2021 14:15:26 -0400 Subject: [PATCH 1/9] Add chemidplus --- src/bioversions/sources/__init__.py | 2 ++ src/bioversions/sources/chemidplus.py | 40 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 src/bioversions/sources/chemidplus.py diff --git a/src/bioversions/sources/__init__.py b/src/bioversions/sources/__init__.py index b4868b26..344d5629 100644 --- a/src/bioversions/sources/__init__.py +++ b/src/bioversions/sources/__init__.py @@ -12,6 +12,7 @@ from .biofacquim import BiofacquimGetter from .biogrid import BioGRIDGetter from .chembl import ChEMBLGetter +from .chemidplus import ChemIDplusGetter from .complexportal import ComplexPortalGetter from .daily import NCBIGeneGetter from .dgi import DGIGetter @@ -91,6 +92,7 @@ def get_getters() -> List[Type[Getter]]: ZfinGetter, NCItGetter, RxNormGetter, + ChemIDplusGetter, ] getters.extend(iter_obo_getters()) extend_ols_getters(getters) diff --git a/src/bioversions/sources/chemidplus.py b/src/bioversions/sources/chemidplus.py new file mode 100644 index 00000000..ec18bc48 --- /dev/null +++ b/src/bioversions/sources/chemidplus.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- + +"""A getter for ChemIDplus.""" + +import re + +import requests + +from bioversions.utils import Getter, VersionType + +__all__ = [ + "ChemIDplusGetter", +] + +RELEASE_PREFIX = "* Release:" +DATE_PREFIX = "* Date:" + + +class ChemIDplusGetter(Getter): + """A getter for ChemIDplus.""" + + bioregistry_id = "chemidplus" + name = "ChemIDplus" + homepage_fmt = "https://ftp.nlm.nih.gov/projects/chemidlease/chem.xm.{version}.zip" + version_type = VersionType.date + + def get(self): + """Get the latest ChemIDplus version number.""" + latest_url = "https://ftp.nlm.nih.gov/projects/chemidlease/CurrentChemID.xml" + headers = {"Range": "bytes=0-300"} # leave some slack to capture date + r = requests.get(latest_url, headers=headers) + print(r.status_code) + if r.status_code == 206: + result = re.search(r" date=\"([0-9]{4}-[0-9]{2}-[0-9]{2})\">", r.text) + if result: + return result.groups()[0] + + +if __name__ == "__main__": + ChemIDplusGetter.print() From 150e05b2fa72d115aa40ceb5f4074c5330a6baf0 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Tue, 28 Sep 2021 22:40:16 -0400 Subject: [PATCH 2/9] Style fixes, add fmt params, raise ValueError on failure --- src/bioversions/sources/chemidplus.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/bioversions/sources/chemidplus.py b/src/bioversions/sources/chemidplus.py index ec18bc48..3c555076 100644 --- a/src/bioversions/sources/chemidplus.py +++ b/src/bioversions/sources/chemidplus.py @@ -12,16 +12,14 @@ "ChemIDplusGetter", ] -RELEASE_PREFIX = "* Release:" -DATE_PREFIX = "* Date:" - class ChemIDplusGetter(Getter): """A getter for ChemIDplus.""" bioregistry_id = "chemidplus" name = "ChemIDplus" - homepage_fmt = "https://ftp.nlm.nih.gov/projects/chemidlease/chem.xm.{version}.zip" + date_version_fmt = "%Y-%m-%d" + homepage_fmt = "https://ftp.nlm.nih.gov/projects/chemidlease/chem.xml.{version}.zip" version_type = VersionType.date def get(self): @@ -29,11 +27,16 @@ def get(self): latest_url = "https://ftp.nlm.nih.gov/projects/chemidlease/CurrentChemID.xml" headers = {"Range": "bytes=0-300"} # leave some slack to capture date r = requests.get(latest_url, headers=headers) - print(r.status_code) if r.status_code == 206: result = re.search(r" date=\"([0-9]{4}-[0-9]{2}-[0-9]{2})\">", r.text) if result: return result.groups()[0] + raise ValueError + + @staticmethod + def homepage_version_transform(version: str) -> str: + """Replace dots with dashes for DrugBank homepage format.""" + return version.replace("-", "") if __name__ == "__main__": From 9fbcade4ff6458b6ab753913f4d734b28dea4533 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Sun, 3 Oct 2021 16:21:10 -0400 Subject: [PATCH 3/9] add Guide to Pharmacology --- src/bioversions/sources/__init__.py | 2 ++ .../sources/guidetopharmacology.py | 36 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 src/bioversions/sources/guidetopharmacology.py diff --git a/src/bioversions/sources/__init__.py b/src/bioversions/sources/__init__.py index 344d5629..1631f8a8 100644 --- a/src/bioversions/sources/__init__.py +++ b/src/bioversions/sources/__init__.py @@ -21,6 +21,7 @@ from .drugcentral import DrugCentralGetter from .expasy import ExPASyGetter from .flybase import FlybaseGetter +from .guidetopharmacology import GuideToPharmacologyGetter from .homologene import HomoloGeneGetter from .intact import IntActGetter from .interpro import InterProGetter @@ -93,6 +94,7 @@ def get_getters() -> List[Type[Getter]]: NCItGetter, RxNormGetter, ChemIDplusGetter, + GuideToPharmacologyGetter, ] getters.extend(iter_obo_getters()) extend_ols_getters(getters) diff --git a/src/bioversions/sources/guidetopharmacology.py b/src/bioversions/sources/guidetopharmacology.py new file mode 100644 index 00000000..86cbe4ab --- /dev/null +++ b/src/bioversions/sources/guidetopharmacology.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- + +"""A getter for GuideToPharmacology.""" + +import re +from datetime import datetime +from typing import Dict + +from bioversions.utils import Getter, VersionType, get_soup + +__all__ = [ + "GuideToPharmacologyGetter", +] + + +class GuideToPharmacologyGetter(Getter): + """A getter for the IUPHAR Guide to Pharmacology.""" + + bioregistry_id = "iuphar" + name = "Guide to Pharmacology" + homepage_fmt = "https://www.guidetopharmacology.org/DATA/public_iuphardb_v{version}.zip" + date_fmt = "%Y-%m-%d" + version_type = VersionType.other + + def get(self) -> Dict[str, str]: + """Get the latest Guide to Pharmacology version number.""" + downloads_url = "https://www.guidetopharmacology.org/download.jsp" + soup = get_soup(downloads_url) + text = soup.findAll("div", {"class": "contentboxfullhelp"})[4].div.ul.li.a.text + grps = re.search(r"^.*(\d{4}\.\d+).*(\d{2}\/\d{2}\/\d{2}).*$", text).groups() + date = datetime.strftime(datetime.strptime(grps[1], "%d/%m/%y"), self.date_fmt) + return {"version": grps[0], "date": date} + + +if __name__ == "__main__": + GuideToPharmacologyGetter.print() From 250e4624bb3c490fd061f581cdabcfb3d3dac9c2 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Sun, 3 Oct 2021 17:15:46 -0400 Subject: [PATCH 4/9] Raise explicit Exception if version extraction from text fails --- src/bioversions/sources/guidetopharmacology.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/bioversions/sources/guidetopharmacology.py b/src/bioversions/sources/guidetopharmacology.py index 86cbe4ab..4341a46e 100644 --- a/src/bioversions/sources/guidetopharmacology.py +++ b/src/bioversions/sources/guidetopharmacology.py @@ -27,7 +27,13 @@ def get(self) -> Dict[str, str]: downloads_url = "https://www.guidetopharmacology.org/download.jsp" soup = get_soup(downloads_url) text = soup.findAll("div", {"class": "contentboxfullhelp"})[4].div.ul.li.a.text - grps = re.search(r"^.*(\d{4}\.\d+).*(\d{2}\/\d{2}\/\d{2}).*$", text).groups() + search = re.search(r"^.*(\d{4}\.\d+).*(\d{2}\/\d{2}\/\d{2}).*$", text) + if search: + grps = search.groups() + else: + raise ValueError( + "Unable to extract version/date from Guide to Pharmacology Downloads page." + ) date = datetime.strftime(datetime.strptime(grps[1], "%d/%m/%y"), self.date_fmt) return {"version": grps[0], "date": date} From 7e702404f8160d1de232997fa2d372a1d04897df Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 4 Oct 2021 00:36:17 +0200 Subject: [PATCH 5/9] Update guidetopharmacology.py --- src/bioversions/sources/guidetopharmacology.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/bioversions/sources/guidetopharmacology.py b/src/bioversions/sources/guidetopharmacology.py index 4341a46e..8cbe9e4c 100644 --- a/src/bioversions/sources/guidetopharmacology.py +++ b/src/bioversions/sources/guidetopharmacology.py @@ -16,7 +16,6 @@ class GuideToPharmacologyGetter(Getter): """A getter for the IUPHAR Guide to Pharmacology.""" - bioregistry_id = "iuphar" name = "Guide to Pharmacology" homepage_fmt = "https://www.guidetopharmacology.org/DATA/public_iuphardb_v{version}.zip" date_fmt = "%Y-%m-%d" From f6db39b2654611a376355368035239510bbda371 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 4 Oct 2021 00:37:50 +0200 Subject: [PATCH 6/9] Precompile regex --- src/bioversions/sources/guidetopharmacology.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/bioversions/sources/guidetopharmacology.py b/src/bioversions/sources/guidetopharmacology.py index 8cbe9e4c..9894f5e3 100644 --- a/src/bioversions/sources/guidetopharmacology.py +++ b/src/bioversions/sources/guidetopharmacology.py @@ -12,6 +12,8 @@ "GuideToPharmacologyGetter", ] +RE = re.compile(r"^.*(\d{4}\.\d+).*(\d{2}\/\d{2}\/\d{2}).*$") + class GuideToPharmacologyGetter(Getter): """A getter for the IUPHAR Guide to Pharmacology.""" @@ -26,7 +28,7 @@ def get(self) -> Dict[str, str]: downloads_url = "https://www.guidetopharmacology.org/download.jsp" soup = get_soup(downloads_url) text = soup.findAll("div", {"class": "contentboxfullhelp"})[4].div.ul.li.a.text - search = re.search(r"^.*(\d{4}\.\d+).*(\d{2}\/\d{2}\/\d{2}).*$", text) + search = RE.search(text) if search: grps = search.groups() else: From 7ed964d60d4a39b616f6adf353ef402198f121b9 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 4 Oct 2021 00:38:06 +0200 Subject: [PATCH 7/9] Fail fast and decrease unnecessary indents --- src/bioversions/sources/guidetopharmacology.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/bioversions/sources/guidetopharmacology.py b/src/bioversions/sources/guidetopharmacology.py index 9894f5e3..33dd30c2 100644 --- a/src/bioversions/sources/guidetopharmacology.py +++ b/src/bioversions/sources/guidetopharmacology.py @@ -29,12 +29,11 @@ def get(self) -> Dict[str, str]: soup = get_soup(downloads_url) text = soup.findAll("div", {"class": "contentboxfullhelp"})[4].div.ul.li.a.text search = RE.search(text) - if search: - grps = search.groups() - else: + if not search: raise ValueError( "Unable to extract version/date from Guide to Pharmacology Downloads page." ) + grps = search.groups() date = datetime.strftime(datetime.strptime(grps[1], "%d/%m/%y"), self.date_fmt) return {"version": grps[0], "date": date} From c7e6ffd3a2077d6b25fbcdfc1f41d638abf39d49 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 4 Oct 2021 00:38:36 +0200 Subject: [PATCH 8/9] Make constant available in case someone wants it --- src/bioversions/sources/guidetopharmacology.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bioversions/sources/guidetopharmacology.py b/src/bioversions/sources/guidetopharmacology.py index 33dd30c2..df29bb74 100644 --- a/src/bioversions/sources/guidetopharmacology.py +++ b/src/bioversions/sources/guidetopharmacology.py @@ -12,6 +12,7 @@ "GuideToPharmacologyGetter", ] +URL = "https://www.guidetopharmacology.org/download.jsp" RE = re.compile(r"^.*(\d{4}\.\d+).*(\d{2}\/\d{2}\/\d{2}).*$") @@ -25,8 +26,7 @@ class GuideToPharmacologyGetter(Getter): def get(self) -> Dict[str, str]: """Get the latest Guide to Pharmacology version number.""" - downloads_url = "https://www.guidetopharmacology.org/download.jsp" - soup = get_soup(downloads_url) + soup = get_soup(URL) text = soup.findAll("div", {"class": "contentboxfullhelp"})[4].div.ul.li.a.text search = RE.search(text) if not search: From 484cafae9d8fea11c042f93cfc17df506012ea0a Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 4 Oct 2021 00:40:02 +0200 Subject: [PATCH 9/9] Add new version type --- src/bioversions/sources/guidetopharmacology.py | 2 +- src/bioversions/utils.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bioversions/sources/guidetopharmacology.py b/src/bioversions/sources/guidetopharmacology.py index df29bb74..daf003f4 100644 --- a/src/bioversions/sources/guidetopharmacology.py +++ b/src/bioversions/sources/guidetopharmacology.py @@ -22,7 +22,7 @@ class GuideToPharmacologyGetter(Getter): name = "Guide to Pharmacology" homepage_fmt = "https://www.guidetopharmacology.org/DATA/public_iuphardb_v{version}.zip" date_fmt = "%Y-%m-%d" - version_type = VersionType.other + version_type = VersionType.year_minor def get(self) -> Dict[str, str]: """Get the latest Guide to Pharmacology version number.""" diff --git a/src/bioversions/utils.py b/src/bioversions/utils.py index c0ab25aa..c9513b27 100644 --- a/src/bioversions/utils.py +++ b/src/bioversions/utils.py @@ -31,6 +31,7 @@ class VersionType(enum.Enum): date = "CalVer (YYYY-MM-DD)" month = "CalVer (YYYY-MM)" year = "CalVer (YYYY)" + year_minor = "CalVer (YYYY.X)" semver_minor = "SemVer (X.Y)" sequential = "Sequential (X)" daily = "Daily"