Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Nov 2, 2023
1 parent b6ed42b commit 0004f34
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 41 deletions.
4 changes: 3 additions & 1 deletion src/bioregistry/data/bioregistry.json
Original file line number Diff line number Diff line change
Expand Up @@ -33721,7 +33721,6 @@
"name": "Drosophila Phenotype Ontology",
"prefix": "FBcv"
},
"part_of": "flybase",
"pattern": "^\\d{7}$",
"prefixcommons": {
"bioportal": "1017",
Expand Down Expand Up @@ -38805,6 +38804,9 @@
"Phenomics",
"Comparative Genomics",
"Omics"
],
"synonyms": [
"GEO"
]
},
"go": {
Expand Down
7 changes: 6 additions & 1 deletion src/bioregistry/data/contexts.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
{
"obo": {
"blacklist": [
"icd9"
"icd9",
"orphanet",
"gro"
],
"custom_prefix_map": {
"PMID": "https://www.ncbi.nlm.nih.gov/pubmed/"
Expand Down Expand Up @@ -29,8 +31,11 @@
"default"
],
"prefix_remapping": {
"GEO": "ncbi.geo",
"ensembl": "ENSEMBL",
"geogeo": "GEO",
"icd10": "ICD10WHO",
"cpga": "GRO",
"orphanet.ordo": "Orphanet",
"pubmed": "PMID",
"snomedct": "SCTID",
Expand Down
18 changes: 7 additions & 11 deletions src/bioregistry/record_accumulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def get_converter(
converter = curies.remap_curie_prefixes(converter, remapping)
if rewiring:
converter = curies.rewire(converter, rewiring)
converter = _enrich_converter_synonyms(converter)
return converter


Expand All @@ -159,13 +160,6 @@ def _get_records( # noqa: C901
resource.prefix: resource.get_priority_prefix(priority=prefix_priority)
for resource in resource_dict.values()
}
dd = defaultdict(list)
for k, v in primary_prefixes.items():
dd[v.lower()].append(k)
dd = {k: v for k, v in dd.items() if len(v) > 1}
if dd:
raise ValueError(f"Duplicate prefixes: {dd}")

pattern_map = {
prefix: pattern
for prefix in primary_prefixes
Expand Down Expand Up @@ -338,20 +332,22 @@ def _add_prefix_prefixes(
primary_uri_prefix = primary_uri_prefixes[prefix]
if not primary_prefix or not primary_uri_prefix:
continue
record = curies.Record(
records[prefix] = curies.Record(
prefix=primary_prefix,
prefix_synonyms=sorted(secondary_prefixes[prefix] - {primary_prefix}),
uri_prefix=primary_uri_prefix,
uri_prefix_synonyms=sorted(secondary_uri_prefixes[prefix] - {primary_uri_prefix}),
pattern=pattern_map.get(prefix),
)
record = _enrich_synonyms(record)
records[prefix] = record

return [record for _, record in sorted(records.items())]


def _enrich_synonyms(record: curies.Record) -> curies.Record:
def _enrich_converter_synonyms(converter: Converter) -> Converter:
return Converter([_enrich_record_synonyms(r) for r in converter.records])


def _enrich_record_synonyms(record: curies.Record) -> curies.Record:
sss = set()
for s in [record.prefix, *record.prefix_synonyms]:
sss.update(_generate_variants(s))
Expand Down
32 changes: 4 additions & 28 deletions tests/test_contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pytest

import bioregistry
import curies
from bioregistry import Resource, manager
from bioregistry.constants import CONTEXTS_PATH

Expand Down Expand Up @@ -46,6 +47,8 @@ def test_obo_context(self):
self.assertEqual(f"{p}/KISAO_", prefix_map["KISAO"])
self.assertIn("FBcv", prefix_map)
self.assertEqual(f"{p}/FBcv_", prefix_map["FBcv"])
self.assertNotIn("geo", prefix_map)
self.assertIn("ncbi.geo", prefix_map)
self.assertIn("GEO", prefix_map)
self.assertEqual(f"{p}/GEO_", prefix_map["GEO"])
self.assertEqual("https://www.ncbi.nlm.nih.gov/pubmed/", prefix_map["PMID"])
Expand Down Expand Up @@ -102,36 +105,9 @@ def test_data(self):
self.valid_metaprefixes.union({"obofoundry.preferred", "preferred", "default"}),
)
remapping = context.prefix_remapping or {}
_valid_remapping_prefixes = set(
bioregistry.get_prefix_map(
uri_prefix_priority=context.uri_prefix_priority,
)
)
_valid_remapping_prefixes = set(manager.converter.prefix_map)
for prefix in remapping:
# Currently this allows overwriting of existing prefixes
self.assertIn(prefix, _valid_remapping_prefixes)

_valid_custom_prefixes = set(
bioregistry.get_prefix_map(
remapping=remapping,
uri_prefix_priority=context.uri_prefix_priority,
)
)
invalid_custom_prefixes = {
prefix
for prefix in (context.custom_prefix_map or {})
if prefix not in _valid_custom_prefixes
}
self.assertEqual(
0,
len(invalid_custom_prefixes),
msg=f"""
All prefixes in the custom prefix mapping should either be canonical prefixes or generated by the prefix remapping
Invalid prefixes: {", ".join(sorted(invalid_custom_prefixes))}
""",
)

for blacklist_prefix in context.blacklist or []:
self.assertIn(blacklist_prefix, self.valid_prefixes)

0 comments on commit 0004f34

Please sign in to comment.