Skip to content

Commit

Permalink
Add new sources for CiVIC Gene and OMIM Phenotypic series (#176)
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt authored Apr 2, 2024
1 parent 178013c commit 9c486a2
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/pyobo/getters.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def get_ontology(

ontology_format, path = _ensure_ontology_path(prefix, force=force, version=version)
if path is None:
raise NoBuild
raise NoBuild(prefix)
elif ontology_format == "obo":
pass # all gucci
elif ontology_format == "owl":
Expand Down
4 changes: 4 additions & 0 deletions src/pyobo/sources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .ccle import CCLEGetter
from .cgnc import CGNCGetter
from .chembl import ChEMBLCompoundGetter
from .civic_gene import CIVICGeneGetter
from .complexportal import ComplexPortalGetter
from .conso import CONSOGetter
from .cpt import CPTGetter
Expand Down Expand Up @@ -38,6 +39,7 @@
from .msigdb import MSigDBGetter
from .ncbigene import NCBIGeneGetter
from .npass import NPASSGetter
from .omim_ps import OMIMPSGetter
from .pathbank import PathBankGetter
from .pfam import PfamGetter
from .pfam_clan import PfamClanGetter
Expand All @@ -61,6 +63,7 @@
"AntibodyRegistryGetter",
"CCLEGetter",
"CGNCGetter",
"CIVICGeneGetter",
"CONSOGetter",
"CPTGetter",
"CVXGetter",
Expand Down Expand Up @@ -94,6 +97,7 @@
"MiRBaseMatureGetter",
"NCBIGeneGetter",
"NPASSGetter",
"OMIMPSGetter",
"PIDGetter",
"PathBankGetter",
"PfamClanGetter",
Expand Down
55 changes: 55 additions & 0 deletions src/pyobo/sources/civic_gene.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-

"""Converter for CiVIC Genes."""

from typing import Iterable, Optional

import pandas as pd

from pyobo.struct import Obo, Reference, Term
from pyobo.utils.path import ensure_df

__all__ = [
"CIVICGeneGetter",
]

PREFIX = "civic.gid"
URL = "https://civicdb.org/downloads/nightly/nightly-GeneSummaries.tsv"


def _sort(_o, t):
return int(t.identifier)


class CIVICGeneGetter(Obo):
"""An ontology representation of CiVIC's gene nomenclature."""

bioversions_key = ontology = PREFIX
term_sort_key = _sort

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over gene terms for CiVIC."""
yield from get_terms(self.data_version, force=force)


def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
"""Get CIVIC terms."""
# if version is not None:
# version_dt: datetime.date = dateutil.parser.parse(version)
# else:
# version_dt: datetime.date = datetime.today()
# version = version_dt.strftime("01-%b-%Y")
# version is like 01-Feb-2024
url = f"https://civicdb.org/downloads/{version}/{version}-GeneSummaries.tsv"
df = ensure_df(prefix=PREFIX, url=url, sep="\t", force=force, dtype=str, version=version)
for identifier, _, name, entrez_id, description, _last_review, _flag in df.values:
term = Term(
reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
definition=description if pd.notna(description) else None,
)
term.append_exact_match(Reference(prefix="ncbigene", identifier=entrez_id))
yield term


if __name__ == "__main__":
CIVICGeneGetter.cli()
39 changes: 39 additions & 0 deletions src/pyobo/sources/omim_ps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-

"""Converter for OMIM Phenotypic Series."""

import logging
from typing import Iterable

from bioversions.utils import get_soup

from pyobo.struct import Obo, Term

__all__ = [
"OMIMPSGetter",
]


logger = logging.getLogger(__name__)
PREFIX = "omim.ps"
URL = "https://omim.org/phenotypicSeriesTitles/all"


class OMIMPSGetter(Obo):
"""An ontology representation of OMIM Phenotypic Series."""

ontology = bioversions_key = PREFIX

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
soup = get_soup(URL, user_agent="Mozilla/5.0")
rows = soup.find(id="mimContent").find("table").find("tbody").find_all("tr")
for row in rows:
anchor = row.find("td").find("a")
name = anchor.text.strip()
identifier = anchor.attrs["href"][len("/phenotypicSeries/") :]
yield Term.from_triple(PREFIX, identifier, name)


if __name__ == "__main__":
OMIMPSGetter.cli()
2 changes: 1 addition & 1 deletion src/pyobo/xrefdb/sources/wikidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#: WikiData SPARQL endpoint. See https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service#Interfacing
URL = "https://query.wikidata.org/bigdata/namespace/wdq/sparql"

WIKIDATA_MAPPING_DIRECTORY = RAW_MODULE.submodule("wikidata", "mappings")
WIKIDATA_MAPPING_DIRECTORY = RAW_MODULE.module("wikidata", "mappings")


def get_wikidata_xrefs_df(*, use_tqdm: bool = True) -> pd.DataFrame:
Expand Down

0 comments on commit 9c486a2

Please sign in to comment.