Skip to content

Commit

Permalink
Merge branch 'master' into subclass_of
Browse files Browse the repository at this point in the history
  • Loading branch information
sierra-moxon committed Dec 13, 2023
2 parents 90a9de3 + ca5c1ea commit 60fad68
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 34 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pr-verify-pull-request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ jobs:
poetry run codespell
poetry run yamllint -c .yamllint-config biolink-model.yaml
poetry run yamllint -c .yamllint-config infores_catalog.yaml
make validate_infores
Expand Down
13 changes: 8 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,14 @@ gen-examples:
cp src/data/examples/* $(EXAMPLEDIR)

infores:
poetry run gen-python information-resource.yaml > information_resource.py
$(RUN) gen-python information-resource.yaml > information_resource.py

validate_infores:
$(RUN) python src/biolink_model/scripts/verify_infores.py

id-prefixes:
poetry run gen-python class_prefixes.yaml > src/biolink_model/scripts/classprefixes.py
cd src/biolink_model/scripts/ && poetry run python id_prefixes.py
$(RUN) gen-python class_prefixes.yaml > src/biolink_model/scripts/classprefixes.py
cd src/biolink_model/scripts/ && $(RUN) python id_prefixes.py

spell:
poetry run codespell
Expand Down Expand Up @@ -127,8 +130,8 @@ gen-project: $(PYMODEL)
-d $(DEST) $(SOURCE_SCHEMA_PATH) && mv $(DEST)/*.py $(PYMODEL)
mv $(DEST)/prefixmap/biolink_model.yaml $(DEST)/prefixmap/biolink_model_prefix_map.json
mv $(PYMODEL)/biolink*.py $(PYMODEL)/model.py
$(RUN) gen-pydantic --pydantic_version 1 src/biolink_model/schema/biolink_model.yaml > $(PYMODEL)/pydanticmodel.py
$(RUN) gen-pydantic --pydantic_version 2 src/biolink_model/schema/biolink_model.yaml > $(PYMODEL)/pydanticmodel_v2.py
$(RUN) gen-pydantic --pydantic-version 1 src/biolink_model/schema/biolink_model.yaml > $(PYMODEL)/pydanticmodel.py
$(RUN) gen-pydantic --pydantic-version 2 src/biolink_model/schema/biolink_model.yaml > $(PYMODEL)/pydanticmodel_v2.py
cp biolink-model.yaml src/biolink_model/schema/biolink_model.yaml
$(MAKE) id-prefixes infores

Expand Down
25 changes: 15 additions & 10 deletions infores_catalog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,8 @@ information_resources:
- id: infores:biocatalogue
status: deprecated
name: BioCatalogue
knowledge level: other
agent type: not_provided
- id: infores:biogrid
status: released
name: 'The Biological General Repository for Interaction Datasets '
Expand All @@ -478,7 +480,7 @@ information_resources:
status: released
name: BioLink API
xref:
- https://api.monarchinitiative.org/api/
- http://api-v3.monarchinitiative.org/
knowledge level: curated
agent type: not_provided
description: API integration layer for linked biological objects.
Expand Down Expand Up @@ -1090,7 +1092,7 @@ information_resources:
name: Connections Hypothesis Provider API
xref:
- https://github.com/NCATSTranslator/Translator-All/wiki/Connections-Hypothesis-Provider
knowledge level: prediction
knowledge level: predicted
agent type: not_provided
description: A Translator Reasoner API for the Connections Hypothesis Provider
- id: infores:cord19
Expand Down Expand Up @@ -1224,14 +1226,14 @@ information_resources:
- id: infores:dili-network-study-data
status: released
name: Drug-Induced Liver Injury Network (DILIN) Participant Data
knowledge level: correlated
knowledge level: correlation
agent type: not_provided
- id: infores:faers
status: released
name: FDA Adverse Event Reporting System
xref:
- https://github.com/NCATSTranslator/Translator-All/wiki/FAERS
knowledge level: observation
knowledge level: observed
agent type: not_provided
description: >-
The FDA Adverse Event Reporting System (FAERS) is a database that contains information
Expand Down Expand Up @@ -1337,7 +1339,7 @@ information_resources:
synonym:
- drugmechdb
knowledge level: curated
agent type: not provided
agent type: not_provided
description: >-
A database of paths that represent the mechanism of action from a drug to a disease in an indication.
- id: infores:ebi
Expand Down Expand Up @@ -1633,6 +1635,8 @@ information_resources:
name: Human Gene Ontology Annotations
xref:
- https://github.com/NCATSTranslator/Translator-All/wiki/Human-GOA
knowledge level: curated
agent type: not_provided
- id: infores:goa
status: released
name: Gene Ontology Annotations
Expand Down Expand Up @@ -1943,8 +1947,8 @@ information_resources:
name: KINOMEscan
xref:
- https://lincs.hms.harvard.edu/kinomescan/
knowledge level: raw_data
agent type: experimental
knowledge level: other
agent type: not_provided
- id: infores:knowledge-collaboratory
status: released
name: Translator Knowledge Collaboratory API
Expand Down Expand Up @@ -2155,7 +2159,7 @@ information_resources:
name: SciGraph-Monarch-Ontology
xref:
- scigraph-ontology.monarchinitiative.org/scigraph/docs/
knowledge level: not_provided
knowledge level: other
agent type: not_provided
- id: infores:monarchinitiative
status: released
Expand Down Expand Up @@ -2580,7 +2584,7 @@ information_resources:
name: Psychoactive Drug Screening Program
xref:
- https://github.com/NCATSTranslator/Translator-All/wiki/PDSP
knowledge level: not_provided
knowledge level: other
agent type: not_provided
description: >-
This service provides screening of novel psychoactive compounds for pharmacological and
Expand Down Expand Up @@ -2886,7 +2890,7 @@ information_resources:
name: SnpEff
xref:
- https://pcingola.github.io/SnpEff/
knowledge level: prediction
knowledge level: predicted
agent type: computational_model
description: >-
Genetic variant annotation and functional effect prediction toolbox.
Expand Down Expand Up @@ -3433,6 +3437,7 @@ information_resources:
xref:
- https://github.com/NCATSTranslator/Translator-All/wiki/SRI-Answer-Appraiser
knowledge level: curated
agent type: not_provided
- id: infores:sri-clinical-evidence-score
status: released
name: SRI Clinical Evidence Score
Expand Down
64 changes: 60 additions & 4 deletions information-resource.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
---
id: https://w3id.org/biolink/biolink-model
name: Biolink-Model
description: Entity and association taxonomy and datamodel for life-sciences data
id: https://w3id.org/biolink/biolink-model/infores
name: Biolink-Model-Information-Resource
license: https://creativecommons.org/publicdomain/zero/1.0/

# Version should be kept in sync with primary Git repository release tag
Expand Down Expand Up @@ -139,12 +138,69 @@ slots:

description:
knowledge level:
range: KnowledgeLevelEnum
description: >-
The level of knowledge that supports an edge or node. This is a general
categorization of the type of evidence that supports a statement, and is
not intended to be a comprehensive description of the evidence. For
example, a statement may be supported by a single publication, but that
publication may contain multiple types of evidence, such as a
computational prediction and a manual curation. In this case, the
knowledge level would be "curated", and the evidence would be described
in more detail in the evidence graph.
agent type:
range: AgentTypeEnum
description: >-
The type of agent that supports an edge or node. This is a general
categorization of the type of agent that supports a statement, and is
not intended to be a comprehensive description of the agent. For
example, a statement may be supported by a single publication, but that
publication may contain multiple types of evidence, such as a
computational prediction and a manual curation. In this case, the
agent type would be "publication", and the evidence would be described
in more detail in the evidence graph.
enums:
InformationResourceStatusEnum:
permissible_values:
released:
deprecated:
draft:
modified:
modified:

KnowledgeLevelEnum:
permissible_values:
curated:
description: >-
knowledge generated through manual curation or interpretation of data or published study results
predicted:
description: >-
predictions generated computationally through inference over less direct forms of evidence (without human
intervention or review)
text_mined:
description: >-
knowledge extracted from published text by NLP agents (without human intervention or review)
correlation:
description: >-
statistical correlations calculated between variables in a clinical or omics dataset, by an automated
analysis pipeline
observed:
description: >-
edge reports a phenomenon that was reported/observed to have occurred (and possibly some quantification,
e.g. how many times, at what frequency)
other:
description: >-
knowledge level may not fit into the categories above, or is not provided/known
mixed:
description: >-
used for sources that might provide edges with different knowledge levels, e.g.correlations in addition to
curated Edges - set tag to Curated, unless predicate rules override
AgentTypeEnum:
permissible_values:
not_provided:
description: >-
agent type is not provided or known
computational_model:
description: >-
a computational model, such as a machine learning model
62 changes: 50 additions & 12 deletions information_resource.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Auto generated from information-resource.yaml by pythongen.py version: 0.0.1
# Generation date: 2023-11-30T15:26:10
# Schema: Biolink-Model
# Generation date: 2023-12-12T15:54:13
# Schema: Biolink-Model-Information-Resource
#
# id: https://w3id.org/biolink/biolink-model
# description: Entity and association taxonomy and datamodel for life-sciences data
# id: https://w3id.org/biolink/biolink-model/infores
# description:
# license: https://creativecommons.org/publicdomain/zero/1.0/

import dataclasses
Expand Down Expand Up @@ -100,8 +100,8 @@ class InformationResource(YAMLRoot):
xref: Optional[Union[str, List[str]]] = empty_list()
synonym: Optional[Union[str, List[str]]] = empty_list()
description: Optional[str] = None
knowledge_level: Optional[str] = None
agent_type: Optional[str] = None
knowledge_level: Optional[Union[str, "KnowledgeLevelEnum"]] = None
agent_type: Optional[Union[str, "AgentTypeEnum"]] = None

def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
if self._is_empty(self.id):
Expand All @@ -126,11 +126,11 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
if self.description is not None and not isinstance(self.description, str):
self.description = str(self.description)

if self.knowledge_level is not None and not isinstance(self.knowledge_level, str):
self.knowledge_level = str(self.knowledge_level)
if self.knowledge_level is not None and not isinstance(self.knowledge_level, KnowledgeLevelEnum):
self.knowledge_level = KnowledgeLevelEnum(self.knowledge_level)

if self.agent_type is not None and not isinstance(self.agent_type, str):
self.agent_type = str(self.agent_type)
if self.agent_type is not None and not isinstance(self.agent_type, AgentTypeEnum):
self.agent_type = AgentTypeEnum(self.agent_type)

super().__post_init__(**kwargs)

Expand All @@ -147,6 +147,44 @@ class InformationResourceStatusEnum(EnumDefinitionImpl):
name="InformationResourceStatusEnum",
)

class KnowledgeLevelEnum(EnumDefinitionImpl):

curated = PermissibleValue(
text="curated",
description="""knowledge generated through manual curation or interpretation of data or published study results""")
predicted = PermissibleValue(
text="predicted",
description="""predictions generated computationally through inference over less direct forms of evidence (without human intervention or review)""")
text_mined = PermissibleValue(
text="text_mined",
description="knowledge extracted from published text by NLP agents (without human intervention or review)")
correlation = PermissibleValue(
text="correlation",
description="""statistical correlations calculated between variables in a clinical or omics dataset, by an automated analysis pipeline""")
observed = PermissibleValue(
text="observed",
description="""edge reports a phenomenon that was reported/observed to have occurred (and possibly some quantification, e.g. how many times, at what frequency)""")
other = PermissibleValue(
text="other",
description="knowledge level may not fit into the categories above, or is not provided/known")
mixed = PermissibleValue(
text="mixed",
description="""used for sources that might provide edges with different knowledge levels, e.g.correlations in addition to curated Edges - set tag to Curated, unless predicate rules override""")

_defn = EnumDefinition(
name="KnowledgeLevelEnum",
)

class AgentTypeEnum(EnumDefinitionImpl):

not_provided = PermissibleValue(
text="not_provided",
description="agent type is not provided or known")

_defn = EnumDefinition(
name="AgentTypeEnum",
)

# Slots
class slots:
pass
Expand All @@ -173,7 +211,7 @@ class slots:
model_uri=INFORES.description, domain=None, range=Optional[str])

slots.knowledge_level = Slot(uri=INFORES.knowledge_level, name="knowledge level", curie=INFORES.curie('knowledge_level'),
model_uri=INFORES.knowledge_level, domain=None, range=Optional[str])
model_uri=INFORES.knowledge_level, domain=None, range=Optional[Union[str, "KnowledgeLevelEnum"]])

slots.agent_type = Slot(uri=INFORES.agent_type, name="agent type", curie=INFORES.curie('agent_type'),
model_uri=INFORES.agent_type, domain=None, range=Optional[str])
model_uri=INFORES.agent_type, domain=None, range=Optional[Union[str, "AgentTypeEnum"]])
6 changes: 3 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions src/biolink_model/scripts/verify_infores.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,25 @@ def validate(self):
for infores in data.get('information_resources'):
# exceptions for resolvable URLs that don't return 200 response for some reason (e.g. require
# user to accept a popup before resolving):
if infores.get("status") == "deprecated":
continue
if infores.get("knowledge level") not in ["curated",
"predicted",
"text_mined",
"correlation",
"observed",
"other",
"mixed"]:
print(infores)
print("Invalid infores knowledge level:" + infores.get("knowledge level")
+ " for " + infores.get("name"))
raise ValueError("invalid return code for " + infores.get("name") + " for " + infores.get("id"))

if infores.get("agent type") not in ["not_provided", "computational_model"]:
print(infores)
print("Invalid infores agent type:" + infores.get("agent type") + " for " + infores.get("name"))
raise ValueError("invalid return code for " + infores.get("name") + " for " + infores.get("id"))

if infores.get("id") == 'infores:athena' \
or infores.get("id") == 'infores:isb-wellness' \
or infores.get("id") == 'infores:isb-incov' \
Expand Down

0 comments on commit 60fad68

Please sign in to comment.