biopragmatics · cthoyt · Sep 22, 2021 · Sep 19, 2021 · Sep 19, 2021 · Sep 21, 2021
diff --git a/src/bioversions/sources/__init__.py b/src/bioversions/sources/__init__.py
@@ -27,6 +27,7 @@
 from .mesh import MeshGetter
 from .mirbase import MirbaseGetter
 from .msigdb import MSigDBGetter
+from .ncit import NCItGetter
 from .npass import NPASSGetter
 from .obo import iter_obo_getters
 from .ols import extend_ols_getters
@@ -87,6 +88,7 @@ def get_getters() -> List[Type[Getter]]:
         PombaseGetter,
         SgdGetter,
         ZfinGetter,
+        NCItGetter,
     ]
     getters.extend(iter_obo_getters())
     extend_ols_getters(getters)

diff --git a/src/bioversions/sources/ncit.py b/src/bioversions/sources/ncit.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+
+"""A getter for the NCI Thesaurus."""
+
+import re
+from typing import Dict
+
+from ..utils import Getter, VersionType, get_soup
+
+__all__ = [
+    "NCItGetter",
+]
+
+URL = "https://ncithesaurus.nci.nih.gov/ncitbrowser/"
+PATTERN = re.compile(
+    r"Version:([0-9]{2}\.[0-9]{2}[a-z]) " r"\(Release date:([0-9]{4}-[0-9]{2}-[0-9]{2})"
+)
+
+
+class NCItGetter(Getter):
+    """A getter for the NCI Thesaurus."""
+
+    bioregistry_id = "ncit"
+    name = "National Cancer Institute Thesaurus"
+    date_fmt = "%Y-%m-%d"
+    version_type = VersionType.other
+
+    def get(self) -> Dict[str, str]:
+        """Get the latest NCIt version number."""
+        soup = get_soup(URL)
+        version_str = soup.find("span", {"class": "vocabularynamelong_ncit"}).contents[0]
+        match = re.search(PATTERN, version_str)
+        return {
+            "version": match.group(1),
+            "date": match.group(2),
+        }
+
+
+if __name__ == "__main__":
+    NCItGetter.print()