-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: load GH Release Assets for schema version in memory (#72)
- Loading branch information
1 parent
10797fc
commit 58bad0a
Showing
17 changed files
with
219 additions
and
138 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
41 changes: 29 additions & 12 deletions
41
api/python/src/cellxgene_ontology_guide/artifact_download.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,36 @@ | ||
import os | ||
from urllib.error import HTTPError, URLError | ||
from urllib.request import urlopen | ||
|
||
from constants import ARTIFACT_DIR, CURRENT_SCHEMA_VERSION | ||
from constants import ONTOLOGY_ASSET_RELEASE_URL, SCHEMA_VERSION_TO_ONTOLOGY_ASSET_TAG | ||
|
||
|
||
def load_artifact_by_schema(schema_version: str, filename: str) -> str: | ||
def load_artifact_by_schema(schema_version: str, filename: str) -> bytes: | ||
""" | ||
Load ontology files from GitHub Release Assets, based on the provided schema version. | ||
Returns ValueError if the schema version is not supported in this package version. | ||
Returns ValueError if the schema version is not supported in this package version or filename is not found for | ||
given schema_version. | ||
:param schema_version: str version of the schema to load ontology files for | ||
:param filename: str name of the file to load | ||
:return: str path to the ontology file | ||
:param schema_version: str version of the schema to load ontology assets for | ||
:param filename: str name of the asset to load | ||
:return: bytes content of the asset | ||
""" | ||
if schema_version == CURRENT_SCHEMA_VERSION: | ||
return os.path.join(ARTIFACT_DIR, filename) | ||
else: | ||
# TODO: Add support for loading ontology files from different schema versions | ||
raise ValueError(f"Schema version {schema_version} is not supported in this package version.") | ||
try: | ||
ontology_asset_tag = SCHEMA_VERSION_TO_ONTOLOGY_ASSET_TAG[schema_version] | ||
except KeyError as e: | ||
raise ValueError(f"Schema version {schema_version} is not supported in this package version.") from e | ||
|
||
download_url = f"{ONTOLOGY_ASSET_RELEASE_URL}/{ontology_asset_tag}/{filename}" | ||
|
||
try: | ||
with urlopen(download_url) as response: | ||
if response.status == 200: | ||
content: bytes = response.read() | ||
return content | ||
else: | ||
raise ValueError(f"Server responded with status code: {response.status}") | ||
except HTTPError as e: | ||
raise ValueError( | ||
f"Could not get {filename} for schema version {schema_version} in GitHub Release Assets: {e}" | ||
) from e | ||
except URLError as e: | ||
raise ValueError(f"URL error occurred: {e.reason}") from e |
Binary file removed
BIN
-12.9 MB
api/python/src/cellxgene_ontology_guide/artifacts/all_ontology.json.gz
Binary file not shown.
36 changes: 0 additions & 36 deletions
36
api/python/src/cellxgene_ontology_guide/artifacts/ontology_info.yml
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
import os | ||
|
||
PACKAGE_ROOT = os.path.dirname(os.path.realpath(__file__)) | ||
ARTIFACT_DIR = os.path.join(PACKAGE_ROOT, "artifacts") | ||
ALL_ONTOLOGY_FILENAME = "all_ontology.json.gz" | ||
ONTOLOGY_INFO_FILENAME = "ontology_info.yml" | ||
CURRENT_SCHEMA_VERSION = "5.0.0" | ||
ONTOLOGY_INFO_FILENAME = "ontology_info.json" | ||
ONTOLOGY_ASSET_RELEASE_URL = "https://github.com/chanzuckerberg/cellxgene-ontology-guide/releases/download" | ||
SCHEMA_VERSION_TO_ONTOLOGY_ASSET_TAG = {"5.0.0": "ontology-assets-v0.0.1"} |
38 changes: 26 additions & 12 deletions
38
api/python/src/cellxgene_ontology_guide/ontology_parser.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,72 @@ | ||
import os | ||
from unittest.mock import Mock, patch | ||
from urllib.error import HTTPError, URLError | ||
|
||
import pytest | ||
from cellxgene_ontology_guide.artifact_download import load_artifact_by_schema | ||
from cellxgene_ontology_guide.constants import ARTIFACT_DIR, CURRENT_SCHEMA_VERSION | ||
from cellxgene_ontology_guide.constants import ALL_ONTOLOGY_FILENAME, ONTOLOGY_ASSET_RELEASE_URL | ||
|
||
|
||
def test_load_artifact_by_schema(): | ||
assert load_artifact_by_schema(CURRENT_SCHEMA_VERSION, "ontology_info.yml") == os.path.join( | ||
ARTIFACT_DIR, "ontology_info.yml" | ||
) | ||
assert load_artifact_by_schema(CURRENT_SCHEMA_VERSION, "all_ontology.json.gz") == os.path.join( | ||
ARTIFACT_DIR, "all_ontology.json.gz" | ||
) | ||
@pytest.fixture | ||
def mock_urlopen(): | ||
"""A fixture that mocks urlopen and simulates a successful response.""" | ||
|
||
def get_mock_response(url): | ||
if url.endswith(ALL_ONTOLOGY_FILENAME): | ||
mock_response = Mock() | ||
mock_response.__enter__ = Mock(return_value=mock_response) | ||
mock_response.__exit__ = Mock(return_value=None) | ||
mock_response.read.return_value = b'{"key": "value"}' | ||
mock_response.status = 200 | ||
return mock_response | ||
else: | ||
raise HTTPError(url, 404, "Not Found", hdrs=None, fp=None) | ||
|
||
def test_load_artifact_by_schema_raises_value_error(): | ||
with pytest.raises(ValueError): | ||
load_artifact_by_schema("0.0.0", "ontology_info.yml") | ||
with patch("cellxgene_ontology_guide.artifact_download.urlopen", side_effect=get_mock_response) as mock: | ||
yield mock | ||
|
||
|
||
@pytest.fixture | ||
def mock_urlopen_url_error(): | ||
"""A fixture that mocks urlopen and simulates a URLError.""" | ||
with patch( | ||
"cellxgene_ontology_guide.artifact_download.urlopen", side_effect=URLError(reason="Network Unreachable") | ||
) as mock: | ||
yield mock | ||
|
||
|
||
def test_load_artifact_by_schema__success(mock_urlopen): | ||
schema_version = "5.0.0" | ||
expected_tag = "ontology-assets-v0.0.1" | ||
expected_resp_content = b'{"key": "value"}' | ||
|
||
result = load_artifact_by_schema(schema_version, ALL_ONTOLOGY_FILENAME) | ||
expected_download_url = f"{ONTOLOGY_ASSET_RELEASE_URL}/{expected_tag}/{ALL_ONTOLOGY_FILENAME}" | ||
|
||
mock_urlopen.assert_called_once_with(expected_download_url) | ||
assert result == expected_resp_content | ||
|
||
|
||
def test_load_artifact_by_schema__unsupported_schema_version(mock_urlopen): | ||
schema_version = "v0.0.0" | ||
with pytest.raises(ValueError) as exc_info: | ||
load_artifact_by_schema(schema_version, ALL_ONTOLOGY_FILENAME) | ||
assert "Schema version v0.0.0 is not supported in this package version." in str(exc_info.value) | ||
mock_urlopen.assert_not_called() | ||
|
||
|
||
def test_load_artifact_by_schema__http_error(mock_urlopen): | ||
schema_version = "5.0.0" | ||
filename = "missing.json" | ||
with pytest.raises(ValueError) as exc_info: | ||
load_artifact_by_schema(schema_version, filename) | ||
assert "Could not get missing.json for schema version 5.0.0 in GitHub Release Assets" in str(exc_info.value) | ||
mock_urlopen.assert_called_once() | ||
|
||
|
||
def test_load_artifact_by_schema__url_error(mock_urlopen_url_error): | ||
schema_version = "5.0.0" | ||
filename = "all_ontology.json.gz" | ||
with pytest.raises(ValueError) as exc_info: | ||
load_artifact_by_schema(schema_version, filename) | ||
assert "URL error occurred: Network Unreachable" in str(exc_info.value) | ||
mock_urlopen_url_error.assert_called_once() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
{ | ||
"CL": { | ||
"version": "v2024-01-04", | ||
"source": "https://github.com/obophenotype/cell-ontology/releases/download", | ||
"filetype": "owl" | ||
}, | ||
"EFO": { | ||
"version": "v3.62.0", | ||
"source": "https://github.com/EBISPOT/efo/releases/download", | ||
"filetype": "owl" | ||
}, | ||
"HANCESTRO": { | ||
"version": "3.0", | ||
"source": "https://github.com/EBISPOT/hancestro/raw", | ||
"filetype": "owl" | ||
}, | ||
"HsapDv": { | ||
"version": "11", | ||
"source": "http://aber-owl.net/media/ontologies/HSAPDV", | ||
"filetype": "owl" | ||
}, | ||
"MONDO": { | ||
"version": "v2024-01-03", | ||
"source": "https://github.com/monarch-initiative/mondo/releases/download", | ||
"filetype": "owl" | ||
}, | ||
"MmusDv": { | ||
"version": "9", | ||
"source": "http://aber-owl.net/media/ontologies/MMUSDV", | ||
"filetype": "owl" | ||
}, | ||
"NCBITaxon": { | ||
"version": "v2023-06-20", | ||
"source": "https://github.com/obophenotype/ncbitaxon/releases/download", | ||
"filetype": "owl.gz" | ||
}, | ||
"UBERON": { | ||
"version": "v2024-01-18", | ||
"source": "https://github.com/obophenotype/uberon/releases/download", | ||
"filetype": "owl" | ||
}, | ||
"PATO": { | ||
"version": "v2023-05-18", | ||
"source": "https://github.com/pato-ontology/pato/raw", | ||
"filetype": "owl" | ||
} | ||
} |
Oops, something went wrong.