Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 26 additions & 102 deletions data/tmrna/example.tsv

Large diffs are not rendered by default.

38 changes: 38 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,44 @@ build-backend = "poetry.core.masonry.api"
xfail_strict = true
filterwarnings = "ignore::DeprecationWarning"
markers = [
"ci: Tests that should run in continuous integration",
"slow: Tests that take a long time",
"db: Test that require access to our database",
"network: Tests that require network",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Adding network marker to pyproject.toml to mark tests that require network access. This is good for categorizing tests, but it's important to ensure that all tests that actually use the network are marked, and that tests are not unnecessarily marked as requiring network access.

"text_mining: tests relating to text mining",
"cli: Test for command line interface",
"cpat: Tests for cpat",
"utils: utility tests",
# Database-specific markers
"crs: tests for crs",
"crw: tests for crw",
"dfam: tests for Dfam",
"ena: tests for ena",
"ensembl: tests for ensembl",
"epmc: tests for europepmc",
"expressionatlas: tests for Expression Atlas",
"genecards: tests for genecards",
"genecards_suite: tests for GeneCards Suite",
"generic: tests for generic json parser",
"gtrnadb: tests for gtrnadb",
"hgnc: tests for hgnc",
"intact: tests for intact",
"ncbi: tests for ncbi",
"ols: tests for ols",
"pdb: tests for pdb",
"pirbase: tests for pirbase",
"psi_mi: tests for psi_mi",
"psicquic: tests for psicquic",
"quickgo: tests for quickgo",
"refseq: tests for refseq",
"rfam: tests for rfam",
"rgd: tests for rgd",
"ribovision: tests for ribovision",
"r2dt: Tests for r2dt related functions",
"sequence_ontology: tests for the sequence ontology",
"so: tests for the sequence ontology",
"silva: tests for silva",
"tarbase: tests for tarbase",
"tmrna: tests for tmRNA",
"trna: Trna related tests",
]
5 changes: 5 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@ markers =

crs: tests for crs
crw: tests for crw
dfam: tests for Dfam
ena: tests for ena
ensembl: tests for ensembl
epmc: tests for europepmc
expressionatlas: tests for Expression Atlas
genecards: tests for genecards
genecards_suite: tests for GeneCards Suite
generic: tests for generic json parser
gtrnadb: tests for gtrnadb
hgnc: tests for hgnc
Expand All @@ -39,7 +42,9 @@ markers =
rgd: tests for rgd
ribovision: tests for ribovision
r2dt: Tests for r2dt related functions
sequence_ontology: tests for the sequence ontology
so: tests for the sequence ontology
silva: tests for silva
tarbase: tests for tarbase
tmrna: tests for tmRNA
trna: Trna related tests
75 changes: 71 additions & 4 deletions tests/databases/ensembl/metadata/karyotypes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""
import asyncio
import pytest
from unittest.mock import Mock, patch

from rnacentral_pipeline.databases.ensembl.metadata import karyotypes as karyo

Expand All @@ -22,9 +23,76 @@ def karyotype(domain, species):
raw = asyncio.run(karyo.fetch(species, domain))
return karyo.process(raw)


# Mock data for Ensembl API responses
HOMO_SAPIENS_RESPONSE = {
"default_coord_system_version": "GRCh38",
"top_level_region": [
# MT chromosome (no bands)
{
"coord_system": "chromosome",
"length": 16569,
"name": "MT"
},
# Y chromosome (with detailed cytogenetic bands)
{
"coord_system": "chromosome",
"name": "Y",
"length": 57227415,
"bands": [
{"stain": "acen", "id": "p11.1", "start": 10300001, "end": 10400000},
{"id": "p11.2", "stain": "gneg", "start": 600001, "end": 10300000},
{"id": "p11.31", "stain": "gpos50", "start": 300001, "end": 600000},
{"stain": "gneg", "id": "p11.32", "start": 1, "end": 300000},
{"start": 10400001, "end": 10600000, "id": "q11.1", "stain": "acen"},
{"stain": "gneg", "start": 10600001, "end": 12400000, "id": "q11.21"},
{"id": "q11.221", "end": 17100000, "start": 12400001, "stain": "gpos50"},
{"end": 19600000, "start": 17100001, "id": "q11.222", "stain": "gneg"},
{"stain": "gpos50", "id": "q11.223", "end": 23800000, "start": 19600001},
{"stain": "gneg", "id": "q11.23", "end": 26600000, "start": 23800001},
{"end": 57227415, "start": 26600001, "stain": "gvar", "id": "q12"},
]
},
# 192 more regions would be here, but we only need MT and Y for tests
] + [{"coord_system": "chromosome", "name": str(i), "length": 1000000} for i in range(1, 193)]
}

GLYCINE_MAX_RESPONSE = {
"default_coord_system_version": "Glycine_max_v2.1",
"top_level_region": [
{
"coord_system": "chromosome",
"name": "1",
"length": 56831624
}
# Would have 1189 more chromosomes, but we only need one for the test
] + [{"coord_system": "chromosome", "name": str(i), "length": 1000000} for i in range(2, 1191)]
}


@pytest.fixture
def mock_ensembl_api():
"""Mock Ensembl REST API responses for karyotype data"""
def mock_get(url, **kwargs):
mock_response = Mock()
mock_response.raise_for_status = Mock()

# Match URL to return appropriate response
if "homo_sapiens" in url:
mock_response.json.return_value = HOMO_SAPIENS_RESPONSE
elif "glycine_max" in url:
mock_response.json.return_value = GLYCINE_MAX_RESPONSE
else:
raise ValueError(f"Unexpected URL in test: {url}")

return mock_response

with patch("rnacentral_pipeline.databases.ensembl.metadata.karyotypes.requests.get", side_effect=mock_get):
yield


@pytest.mark.ensembl
@pytest.mark.network
def test_builds_empty_karyotype_for_missing_data():
def test_builds_empty_karyotype_for_missing_data(mock_ensembl_api):
_, found = karyotype("ensembl", "glycine_max")
assert len(found) == 1190
assert found["1"] == {
Expand All @@ -38,8 +106,7 @@ def test_builds_empty_karyotype_for_missing_data():
}

@pytest.mark.ensembl
@pytest.mark.network
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The @pytest.mark.network marker is no longer needed here, as the test has been refactored to use mocking and no longer depends on a network connection. Removing this marker will accurately reflect the test's independence from network resources.

Suggested change
@pytest.mark.network
def test_builds_with_known_bands():

def test_builds_with_known_bands():
def test_builds_with_known_bands(mock_ensembl_api):
_, found = karyotype("ensembl", "homo_sapiens")
assert len(found) == 194
assert found["MT"] == {
Expand Down
Loading