From a245c8c59ac4aa4259f35237ec9fb5f7b4a2d689 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 22 Mar 2024 11:19:11 -0400 Subject: [PATCH 1/3] feat: add function to fetch curated ontology term lists --- .github/workflows/release.yml | 2 + .../curated_ontology_term_lists.py | 21 ++++++++++ .../src/cellxgene_ontology_guide/entities.py | 13 ++++++ .../ontology_parser.py | 10 +++-- .../tests/test_curated_ontology_term_lists.py | 41 +++++++++++++++++++ api/python/tests/test_ontology_parser.py | 8 ++++ .../uberon_development_stage_schema.json | 9 ++++ 7 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 api/python/src/cellxgene_ontology_guide/curated_ontology_term_lists.py create mode 100644 api/python/tests/test_curated_ontology_term_lists.py create mode 100644 artifact-schemas/uberon_development_stage_schema.json diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 643bb7fc..005eb539 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -42,6 +42,8 @@ jobs: content_type: "application/json" - file_name: "system_list.json" content_type: "application/json" + - file_name: "uberon_development_stage.json" + content_type: "application/json" steps: - name: Checkout main branch uses: actions/checkout@v4 diff --git a/api/python/src/cellxgene_ontology_guide/curated_ontology_term_lists.py b/api/python/src/cellxgene_ontology_guide/curated_ontology_term_lists.py new file mode 100644 index 00000000..3883c506 --- /dev/null +++ b/api/python/src/cellxgene_ontology_guide/curated_ontology_term_lists.py @@ -0,0 +1,21 @@ +import functools +import json +import os + +from typing import List + +from cellxgene_ontology_guide._constants import DATA_ROOT +from cellxgene_ontology_guide.entities import CuratedOntologyTermList + + +@functools.cache +def get_curated_ontology_term_list(curated_ontology_term_list: CuratedOntologyTermList) -> List[str]: + """ + Get the list of curated ontology terms for the given curated_ontology_term_list. + + :param curated_ontology_term_list: Enum attribute representing the curated ontology term list + :return: List[str] of ontology term IDs + """ + filename = f"{curated_ontology_term_list.value}_list.json" + with open(os.path.join(DATA_ROOT, filename)) as f: + return json.load(f) diff --git a/api/python/src/cellxgene_ontology_guide/entities.py b/api/python/src/cellxgene_ontology_guide/entities.py index 871f77fd..c7920978 100644 --- a/api/python/src/cellxgene_ontology_guide/entities.py +++ b/api/python/src/cellxgene_ontology_guide/entities.py @@ -15,3 +15,16 @@ class Ontology(Enum): MmusDv = "mmusdv" PATO = "pato" NCBITaxon = "ncbitaxon" + + +class CuratedOntologyTermList(Enum): + """ + Enum for the set of curated ontology term lists supported by CZ CellXGene + """ + + CELL_CLASS = "cell_class" + CELL_SUBCLASS = "cell_subclass" + ORGAN = "organ" + SYSTEM = "system" + TISSUE_GENERAL = "tissue_general" + UBERON_DEVELOPMENT_STAGE = "uberon_development_stage" \ No newline at end of file diff --git a/api/python/src/cellxgene_ontology_guide/ontology_parser.py b/api/python/src/cellxgene_ontology_guide/ontology_parser.py index 2b6a69ef..1e1a3206 100644 --- a/api/python/src/cellxgene_ontology_guide/ontology_parser.py +++ b/api/python/src/cellxgene_ontology_guide/ontology_parser.py @@ -1,5 +1,5 @@ import re -from typing import Any, Dict, Iterable, List, Union +from typing import Any, Dict, Iterable, List, Optional, Union from cellxgene_ontology_guide._constants import VALID_NON_ONTOLOGY_TERMS from cellxgene_ontology_guide.entities import Ontology @@ -39,16 +39,20 @@ def _parse_ontology_name(self, term_id: str) -> str: return ontology_name - def is_valid_term_id(self, term_id: str) -> bool: + def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: """ Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined - in the ontology, it is considered valid. + in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine + if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology :param term_id: str ontology term to check + :param ontology: str name of ontology to check against :return: boolean flag indicating whether the term is supported """ try: ontology_name = self._parse_ontology_name(term_id) + if ontology and ontology_name != ontology: + return False if term_id in self.cxg_schema.ontology(ontology_name): return True except ValueError: diff --git a/api/python/tests/test_curated_ontology_term_lists.py b/api/python/tests/test_curated_ontology_term_lists.py new file mode 100644 index 00000000..80f86a10 --- /dev/null +++ b/api/python/tests/test_curated_ontology_term_lists.py @@ -0,0 +1,41 @@ +import json +import pytest + +from unittest.mock import patch + +from cellxgene_ontology_guide.entities import CuratedOntologyTermList +from cellxgene_ontology_guide.curated_ontology_term_lists import get_curated_ontology_term_list + +MODULE_PATH = "cellxgene_ontology_guide.curated_ontology_term_lists" + + +@pytest.fixture +def mock_curated_ontology_term_list_file(tmpdir): + with patch(f"{MODULE_PATH}.DATA_ROOT", tmpdir): + test_file_name = "cell_class_list.json" + test_enum = CuratedOntologyTermList.CELL_CLASS + onto_file = tmpdir.join(test_file_name) + file_contents = ["cell class 1", "cell class 2"] + with open(str(onto_file), "wt") as onto_file: + json.dump(file_contents, onto_file) + yield test_enum, file_contents + + +def test_get_curated_ontology_term_list(mock_curated_ontology_term_list_file): + test_enum, file_contents = mock_curated_ontology_term_list_file + assert get_curated_ontology_term_list(test_enum) == file_contents + assert get_curated_ontology_term_list.cache_info().hits == 0 + assert get_curated_ontology_term_list.cache_info().misses == 1 + get_curated_ontology_term_list(test_enum) + assert get_curated_ontology_term_list.cache_info().hits == 1 + assert get_curated_ontology_term_list.cache_info().misses == 1 + + +def test__clear_curated_ontology_term_list_cache(mock_curated_ontology_term_list_file): + test_enum, _ = mock_curated_ontology_term_list_file + get_curated_ontology_term_list(test_enum) + assert get_curated_ontology_term_list.cache_info().misses == 1 + get_curated_ontology_term_list.cache_clear() + assert get_curated_ontology_term_list.cache_info().misses == 0 + get_curated_ontology_term_list(test_enum) + assert get_curated_ontology_term_list.cache_info().misses == 1 \ No newline at end of file diff --git a/api/python/tests/test_ontology_parser.py b/api/python/tests/test_ontology_parser.py index 3e8d4e4d..f4282c8b 100644 --- a/api/python/tests/test_ontology_parser.py +++ b/api/python/tests/test_ontology_parser.py @@ -80,6 +80,14 @@ def test_is_valid_term_id(ontology_parser, term_id, expected): assert ontology_parser.is_valid_term_id(term_id) == expected +@pytest.mark.parametrize( + "term_id,ontology,expected", + [("CL:0000001", "CL", True), ("CL:0000001", "UBERON", False), ("GO:0000001", "GO", False)], +) +def test_is_valid_term_id__with_ontology(ontology_parser, term_id, ontology, expected): + assert ontology_parser.is_valid_term_id(term_id, ontology) == expected + + def test_get_term_ancestors(ontology_parser): assert ontology_parser.get_term_ancestors("CL:0000004") == ["CL:0000000", "CL:0000001", "CL:0000002"] assert ontology_parser.get_term_ancestors("CL:0000004", include_self=True) == [ diff --git a/artifact-schemas/uberon_development_stage_schema.json b/artifact-schemas/uberon_development_stage_schema.json new file mode 100644 index 00000000..867588bc --- /dev/null +++ b/artifact-schemas/uberon_development_stage_schema.json @@ -0,0 +1,9 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Curated 'Development Stage' UBERON Ontology Terms Schema", + "description": "A schema for validating an array of high-level UBERON ontology term IDs representing 'development stage' ontology terms, curated for CZ CellxGene use.", + "type": "array", + "items": {"$ref": "ontology_term_id_schema.json#/definitions/UBERON_term_id"}, + "minItems": 1, + "uniqueItems": true +} \ No newline at end of file From 4749247b13137767b8f9c270cdfe1baa15154707 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 22 Mar 2024 11:33:27 -0400 Subject: [PATCH 2/3] lint + add uberon development stage list --- .../curated_ontology_term_lists.py | 3 +- .../src/cellxgene_ontology_guide/entities.py | 2 +- .../tests/test_curated_ontology_term_lists.py | 7 ++- .../uberon_development_stage_list.json | 50 +++++++++++++++++++ 4 files changed, 55 insertions(+), 7 deletions(-) create mode 100644 ontology-assets/uberon_development_stage_list.json diff --git a/api/python/src/cellxgene_ontology_guide/curated_ontology_term_lists.py b/api/python/src/cellxgene_ontology_guide/curated_ontology_term_lists.py index 3883c506..4f26ee3a 100644 --- a/api/python/src/cellxgene_ontology_guide/curated_ontology_term_lists.py +++ b/api/python/src/cellxgene_ontology_guide/curated_ontology_term_lists.py @@ -1,7 +1,6 @@ import functools import json import os - from typing import List from cellxgene_ontology_guide._constants import DATA_ROOT @@ -18,4 +17,4 @@ def get_curated_ontology_term_list(curated_ontology_term_list: CuratedOntologyTe """ filename = f"{curated_ontology_term_list.value}_list.json" with open(os.path.join(DATA_ROOT, filename)) as f: - return json.load(f) + return json.load(f) # type: ignore diff --git a/api/python/src/cellxgene_ontology_guide/entities.py b/api/python/src/cellxgene_ontology_guide/entities.py index c7920978..a26b1b04 100644 --- a/api/python/src/cellxgene_ontology_guide/entities.py +++ b/api/python/src/cellxgene_ontology_guide/entities.py @@ -27,4 +27,4 @@ class CuratedOntologyTermList(Enum): ORGAN = "organ" SYSTEM = "system" TISSUE_GENERAL = "tissue_general" - UBERON_DEVELOPMENT_STAGE = "uberon_development_stage" \ No newline at end of file + UBERON_DEVELOPMENT_STAGE = "uberon_development_stage" diff --git a/api/python/tests/test_curated_ontology_term_lists.py b/api/python/tests/test_curated_ontology_term_lists.py index 80f86a10..9f698504 100644 --- a/api/python/tests/test_curated_ontology_term_lists.py +++ b/api/python/tests/test_curated_ontology_term_lists.py @@ -1,10 +1,9 @@ import json -import pytest - from unittest.mock import patch -from cellxgene_ontology_guide.entities import CuratedOntologyTermList +import pytest from cellxgene_ontology_guide.curated_ontology_term_lists import get_curated_ontology_term_list +from cellxgene_ontology_guide.entities import CuratedOntologyTermList MODULE_PATH = "cellxgene_ontology_guide.curated_ontology_term_lists" @@ -38,4 +37,4 @@ def test__clear_curated_ontology_term_list_cache(mock_curated_ontology_term_list get_curated_ontology_term_list.cache_clear() assert get_curated_ontology_term_list.cache_info().misses == 0 get_curated_ontology_term_list(test_enum) - assert get_curated_ontology_term_list.cache_info().misses == 1 \ No newline at end of file + assert get_curated_ontology_term_list.cache_info().misses == 1 diff --git a/ontology-assets/uberon_development_stage_list.json b/ontology-assets/uberon_development_stage_list.json new file mode 100644 index 00000000..6ad9c412 --- /dev/null +++ b/ontology-assets/uberon_development_stage_list.json @@ -0,0 +1,50 @@ +[ + "UBERON:0007236", + "UBERON:0000106", + "UBERON:0014859", + "UBERON:0008264", + "UBERON:0007233", + "UBERON:0000112", + "UBERON:8000003", + "UBERON:0014857", + "UBERON:0009849", + "UBERON:0034920", + "UBERON:0000069", + "UBERON:0000109", + "UBERON:8000001", + "UBERON:0000068", + "UBERON:0018685", + "UBERON:0000107", + "UBERON:0007222", + "UBERON:0000092", + "UBERON:0018378", + "UBERON:0014864", + "UBERON:0004730", + "UBERON:0000111", + "UBERON:0007220", + "UBERON:0014405", + "UBERON:0014862", + "UBERON:8000000", + "UBERON:0000071", + "UBERON:0014860", + "UBERON:0012101", + "UBERON:0000113", + "UBERON:0014858", + "UBERON:0007232", + "UBERON:0000070", + "UBERON:0000110", + "UBERON:8000002", + "UBERON:0014856", + "UBERON:0004728", + "UBERON:0034919", + "UBERON:0000108", + "UBERON:0000066", + "UBERON:0004707", + "UBERON:0000105", + "UBERON:0018241", + "UBERON:0007221", + "UBERON:0014406", + "UBERON:0014863", + "UBERON:0004729", + "UBERON:0014861" +] \ No newline at end of file From 05ac7b829a063dbe7f5b0548a466c321b989f155 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Fri, 22 Mar 2024 11:57:48 -0400 Subject: [PATCH 3/3] fix schema name --- ...tage_schema.json => uberon_development_stage_list_schema.json} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename artifact-schemas/{uberon_development_stage_schema.json => uberon_development_stage_list_schema.json} (100%) diff --git a/artifact-schemas/uberon_development_stage_schema.json b/artifact-schemas/uberon_development_stage_list_schema.json similarity index 100% rename from artifact-schemas/uberon_development_stage_schema.json rename to artifact-schemas/uberon_development_stage_list_schema.json