Skip to content

Commit

Permalink
Cache SO names (#199)
Browse files Browse the repository at this point in the history
A lot of resources (HGNC, PomBase, ZFIN) require SO names for
legibility, so this caches them
  • Loading branch information
cthoyt authored Nov 4, 2024
1 parent 4a5416f commit 723349b
Show file tree
Hide file tree
Showing 6 changed files with 2,676 additions and 7 deletions.
55 changes: 55 additions & 0 deletions src/pyobo/resources/so.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""Loading of the relations ontology names."""

from __future__ import annotations

import csv
import os
from functools import lru_cache

import requests

__all__ = [
"get_so_name",
"load_so",
]

HERE = os.path.abspath(os.path.dirname(__file__))
SO_PATH = os.path.join(HERE, "so.tsv")
SO_JSON_URL = "https://github.com/The-Sequence-Ontology/SO-Ontologies/raw/refs/heads/master/Ontology_Files/so-simple.json"
SO_URI_PREFIX = "http://purl.obolibrary.org/obo/SO_"


def get_so_name(so_id: str) -> str | None:
"""Get the name from the identifier."""
return load_so().get(so_id)


@lru_cache(maxsize=1)
def load_so() -> dict[str, str]:
"""Load the Sequence Ontology names."""
if not os.path.exists(SO_PATH):
download_so()
with open(SO_PATH) as file:
return dict(csv.reader(file, delimiter="\t"))


def download_so():
"""Download the latest version of the Relation Ontology."""
rows = []
res_json = requests.get(SO_JSON_URL).json()
for node in res_json["graphs"][0]["nodes"]:
uri = node["id"]
if not uri.startswith(SO_URI_PREFIX):
continue
identifier = uri.removeprefix(SO_URI_PREFIX)
name = node.get("lbl")
if name:
rows.append((identifier, name))

with open(SO_PATH, "w") as file:
writer = csv.writer(file, delimiter="\t")
writer.writerows(sorted(rows, key=lambda x: int(x[0])))


if __name__ == "__main__":
download_so()
Loading

0 comments on commit 723349b

Please sign in to comment.