Merge branch 'master' into subclass_of

biolink · Dec 13, 2023 · 60fad68 · 60fad68
2 parents 90a9de3 + ca5c1ea
commit 60fad68
Show file tree

Hide file tree

Showing 7 changed files with 156 additions and 34 deletions.
diff --git a/.github/workflows/pr-verify-pull-request.yaml b/.github/workflows/pr-verify-pull-request.yaml
@@ -41,6 +41,7 @@ jobs:
           poetry run codespell
           poetry run yamllint -c .yamllint-config biolink-model.yaml
           poetry run yamllint -c .yamllint-config infores_catalog.yaml
+          make validate_infores
 
 
 

diff --git a/Makefile b/Makefile
@@ -95,11 +95,14 @@ gen-examples:
 	cp src/data/examples/* $(EXAMPLEDIR)
 
 infores:
-	poetry run gen-python information-resource.yaml > information_resource.py
+	$(RUN) gen-python information-resource.yaml > information_resource.py
+
+validate_infores:
+	$(RUN) python src/biolink_model/scripts/verify_infores.py
 
 id-prefixes:
-	poetry run gen-python class_prefixes.yaml > src/biolink_model/scripts/classprefixes.py
-	cd src/biolink_model/scripts/ && poetry run python id_prefixes.py
+	$(RUN) gen-python class_prefixes.yaml > src/biolink_model/scripts/classprefixes.py
+	cd src/biolink_model/scripts/ && $(RUN) python id_prefixes.py
 
 spell:
 	poetry run codespell
@@ -127,8 +130,8 @@ gen-project: $(PYMODEL)
 		-d $(DEST) $(SOURCE_SCHEMA_PATH) && mv $(DEST)/*.py $(PYMODEL)
 	mv $(DEST)/prefixmap/biolink_model.yaml $(DEST)/prefixmap/biolink_model_prefix_map.json
 	mv $(PYMODEL)/biolink*.py $(PYMODEL)/model.py
-	$(RUN) gen-pydantic --pydantic_version 1 src/biolink_model/schema/biolink_model.yaml > $(PYMODEL)/pydanticmodel.py
-	$(RUN) gen-pydantic --pydantic_version 2 src/biolink_model/schema/biolink_model.yaml > $(PYMODEL)/pydanticmodel_v2.py
+	$(RUN) gen-pydantic --pydantic-version 1 src/biolink_model/schema/biolink_model.yaml > $(PYMODEL)/pydanticmodel.py
+	$(RUN) gen-pydantic --pydantic-version 2 src/biolink_model/schema/biolink_model.yaml > $(PYMODEL)/pydanticmodel_v2.py
 	cp biolink-model.yaml src/biolink_model/schema/biolink_model.yaml
 	$(MAKE) id-prefixes infores
 

diff --git a/infores_catalog.yaml b/infores_catalog.yaml
@@ -465,6 +465,8 @@ information_resources:
   - id: infores:biocatalogue
     status: deprecated
     name: BioCatalogue
+    knowledge level: other
+    agent type: not_provided
   - id: infores:biogrid
     status: released
     name: 'The Biological General Repository for Interaction Datasets '
@@ -478,7 +480,7 @@ information_resources:
     status: released
     name: BioLink API
     xref:
-      - https://api.monarchinitiative.org/api/
+      - http://api-v3.monarchinitiative.org/
     knowledge level: curated
     agent type: not_provided
     description: API integration layer for linked biological objects.
@@ -1090,7 +1092,7 @@ information_resources:
     name: Connections Hypothesis Provider API
     xref:
       - https://github.com/NCATSTranslator/Translator-All/wiki/Connections-Hypothesis-Provider
-    knowledge level: prediction
+    knowledge level: predicted
     agent type: not_provided
     description: A Translator Reasoner API for the Connections Hypothesis Provider
   - id: infores:cord19
@@ -1224,14 +1226,14 @@ information_resources:
   - id: infores:dili-network-study-data
     status: released
     name: Drug-Induced Liver Injury Network (DILIN) Participant Data
-    knowledge level: correlated
+    knowledge level: correlation
     agent type: not_provided
   - id: infores:faers
     status: released
     name: FDA Adverse Event Reporting System
     xref:
       - https://github.com/NCATSTranslator/Translator-All/wiki/FAERS
-    knowledge level: observation
+    knowledge level: observed
     agent type: not_provided
     description: >-
       The FDA Adverse Event Reporting System (FAERS) is a database that contains information 
@@ -1337,7 +1339,7 @@ information_resources:
     synonym:
       - drugmechdb
     knowledge level: curated
-    agent type: not provided
+    agent type: not_provided
     description: >-
       A database of paths that represent the mechanism of action from a drug to a disease in an indication.
   - id: infores:ebi
@@ -1633,6 +1635,8 @@ information_resources:
     name: Human Gene Ontology Annotations
     xref:
       - https://github.com/NCATSTranslator/Translator-All/wiki/Human-GOA
+    knowledge level: curated
+    agent type: not_provided
   - id: infores:goa
     status: released
     name: Gene Ontology Annotations
@@ -1943,8 +1947,8 @@ information_resources:
     name: KINOMEscan
     xref:
       - https://lincs.hms.harvard.edu/kinomescan/
-    knowledge level: raw_data
-    agent type: experimental
+    knowledge level: other
+    agent type: not_provided
   - id: infores:knowledge-collaboratory
     status: released
     name: Translator Knowledge Collaboratory API
@@ -2155,7 +2159,7 @@ information_resources:
     name: SciGraph-Monarch-Ontology
     xref:
       - scigraph-ontology.monarchinitiative.org/scigraph/docs/
-    knowledge level: not_provided
+    knowledge level: other
     agent type: not_provided
   - id: infores:monarchinitiative
     status: released
@@ -2580,7 +2584,7 @@ information_resources:
     name: Psychoactive Drug Screening Program
     xref:
       - https://github.com/NCATSTranslator/Translator-All/wiki/PDSP
-    knowledge level: not_provided
+    knowledge level: other
     agent type: not_provided
     description: >-
       This service provides screening of novel psychoactive compounds for pharmacological and 
@@ -2886,7 +2890,7 @@ information_resources:
     name: SnpEff
     xref:
       - https://pcingola.github.io/SnpEff/
-    knowledge level: prediction
+    knowledge level: predicted
     agent type: computational_model
     description: >-
       Genetic variant annotation and functional effect prediction toolbox. 
@@ -3433,6 +3437,7 @@ information_resources:
     xref:
       - https://github.com/NCATSTranslator/Translator-All/wiki/SRI-Answer-Appraiser
     knowledge level: curated
+    agent type: not_provided
   - id: infores:sri-clinical-evidence-score
     status: released
     name: SRI Clinical Evidence Score

diff --git a/information-resource.yaml b/information-resource.yaml
@@ -1,7 +1,6 @@
 ---
-id: https://w3id.org/biolink/biolink-model
-name: Biolink-Model
-description: Entity and association taxonomy and datamodel for life-sciences data
+id: https://w3id.org/biolink/biolink-model/infores
+name: Biolink-Model-Information-Resource
 license: https://creativecommons.org/publicdomain/zero/1.0/
 
 # Version should be kept in sync with primary Git repository release tag
@@ -139,12 +138,69 @@ slots:
 
   description:
   knowledge level:
+    range: KnowledgeLevelEnum
+    description: >-
+        The level of knowledge that supports an edge or node.  This is a general
+        categorization of the type of evidence that supports a statement, and is
+        not intended to be a comprehensive description of the evidence.  For
+        example, a statement may be supported by a single publication, but that
+        publication may contain multiple types of evidence, such as a
+        computational prediction and a manual curation.  In this case, the
+        knowledge level would be "curated", and the evidence would be described
+        in more detail in the evidence graph.
   agent type:
+    range: AgentTypeEnum
+    description: >-
+        The type of agent that supports an edge or node.  This is a general
+        categorization of the type of agent that supports a statement, and is
+        not intended to be a comprehensive description of the agent.  For
+        example, a statement may be supported by a single publication, but that
+        publication may contain multiple types of evidence, such as a
+        computational prediction and a manual curation.  In this case, the
+        agent type would be "publication", and the evidence would be described
+        in more detail in the evidence graph.
 
 enums:
   InformationResourceStatusEnum:
     permissible_values:
       released:
       deprecated:
       draft:
-      modified:
+      modified:
+
+  KnowledgeLevelEnum:
+    permissible_values:
+      curated:
+        description: >-
+          knowledge generated through manual curation  or interpretation of data or published study results
+      predicted:
+        description: >-
+          predictions generated computationally through inference over less direct forms of evidence (without human 
+          intervention or review)
+      text_mined:
+        description: >-
+          knowledge extracted from published text by NLP agents (without human intervention or review)
+      correlation:
+        description: >-
+          statistical correlations calculated between variables in a clinical or omics dataset, by an automated 
+          analysis pipeline
+      observed:
+        description: >-
+          edge reports a phenomenon that was reported/observed to have occurred (and possibly some quantification, 
+          e.g. how many times, at what frequency)
+      other:
+        description: >-
+          knowledge level may not fit into the categories above, or is not provided/known
+      mixed:
+        description: >-
+          used for sources that might provide edges with different knowledge levels, e.g.correlations in addition to 
+          curated Edges - set tag to Curated, unless predicate rules override
+
+  AgentTypeEnum:
+    permissible_values:
+      not_provided:
+        description: >-
+          agent type is not provided or known
+      computational_model:
+        description: >-
+          a computational model, such as a machine learning model
diff --git a/information_resource.py b/information_resource.py
@@ -1,9 +1,9 @@
 # Auto generated from information-resource.yaml by pythongen.py version: 0.0.1
-# Generation date: 2023-11-30T15:26:10
-# Schema: Biolink-Model
+# Generation date: 2023-12-12T15:54:13
+# Schema: Biolink-Model-Information-Resource
 #
-# id: https://w3id.org/biolink/biolink-model
-# description: Entity and association taxonomy and datamodel for life-sciences data
+# id: https://w3id.org/biolink/biolink-model/infores
+# description:
 # license: https://creativecommons.org/publicdomain/zero/1.0/
 
 import dataclasses
@@ -100,8 +100,8 @@ class InformationResource(YAMLRoot):
     xref: Optional[Union[str, List[str]]] = empty_list()
     synonym: Optional[Union[str, List[str]]] = empty_list()
     description: Optional[str] = None
-    knowledge_level: Optional[str] = None
-    agent_type: Optional[str] = None
+    knowledge_level: Optional[Union[str, "KnowledgeLevelEnum"]] = None
+    agent_type: Optional[Union[str, "AgentTypeEnum"]] = None
 
     def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
         if self._is_empty(self.id):
@@ -126,11 +126,11 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
         if self.description is not None and not isinstance(self.description, str):
             self.description = str(self.description)
 
-        if self.knowledge_level is not None and not isinstance(self.knowledge_level, str):
-            self.knowledge_level = str(self.knowledge_level)
+        if self.knowledge_level is not None and not isinstance(self.knowledge_level, KnowledgeLevelEnum):
+            self.knowledge_level = KnowledgeLevelEnum(self.knowledge_level)
 
-        if self.agent_type is not None and not isinstance(self.agent_type, str):
-            self.agent_type = str(self.agent_type)
+        if self.agent_type is not None and not isinstance(self.agent_type, AgentTypeEnum):
+            self.agent_type = AgentTypeEnum(self.agent_type)
 
         super().__post_init__(**kwargs)
 
@@ -147,6 +147,44 @@ class InformationResourceStatusEnum(EnumDefinitionImpl):
         name="InformationResourceStatusEnum",
     )
 
+class KnowledgeLevelEnum(EnumDefinitionImpl):
+
+    curated = PermissibleValue(
+        text="curated",
+        description="""knowledge generated through manual curation  or interpretation of data or published study results""")
+    predicted = PermissibleValue(
+        text="predicted",
+        description="""predictions generated computationally through inference over less direct forms of evidence (without human  intervention or review)""")
+    text_mined = PermissibleValue(
+        text="text_mined",
+        description="knowledge extracted from published text by NLP agents (without human intervention or review)")
+    correlation = PermissibleValue(
+        text="correlation",
+        description="""statistical correlations calculated between variables in a clinical or omics dataset, by an automated  analysis pipeline""")
+    observed = PermissibleValue(
+        text="observed",
+        description="""edge reports a phenomenon that was reported/observed to have occurred (and possibly some quantification,  e.g. how many times, at what frequency)""")
+    other = PermissibleValue(
+        text="other",
+        description="knowledge level may not fit into the categories above, or is not provided/known")
+    mixed = PermissibleValue(
+        text="mixed",
+        description="""used for sources that might provide edges with different knowledge levels, e.g.correlations in addition to  curated Edges - set tag to Curated, unless predicate rules override""")
+
+    _defn = EnumDefinition(
+        name="KnowledgeLevelEnum",
+    )
+
+class AgentTypeEnum(EnumDefinitionImpl):
+
+    not_provided = PermissibleValue(
+        text="not_provided",
+        description="agent type is not provided or known")
+
+    _defn = EnumDefinition(
+        name="AgentTypeEnum",
+    )
+
 # Slots
 class slots:
     pass
@@ -173,7 +211,7 @@ class slots:
                    model_uri=INFORES.description, domain=None, range=Optional[str])
 
 slots.knowledge_level = Slot(uri=INFORES.knowledge_level, name="knowledge level", curie=INFORES.curie('knowledge_level'),
-                   model_uri=INFORES.knowledge_level, domain=None, range=Optional[str])
+                   model_uri=INFORES.knowledge_level, domain=None, range=Optional[Union[str, "KnowledgeLevelEnum"]])
 
 slots.agent_type = Slot(uri=INFORES.agent_type, name="agent type", curie=INFORES.curie('agent_type'),
-                   model_uri=INFORES.agent_type, domain=None, range=Optional[str])
+                   model_uri=INFORES.agent_type, domain=None, range=Optional[Union[str, "AgentTypeEnum"]])
diff --git a/poetry.lock b/poetry.lock
diff --git a/src/biolink_model/scripts/verify_infores.py b/src/biolink_model/scripts/verify_infores.py
@@ -52,6 +52,25 @@ def validate(self):
             for infores in data.get('information_resources'):
                 # exceptions for resolvable URLs that don't return 200 response for some reason (e.g. require
                 # user to accept a popup before resolving):
+                if infores.get("status") == "deprecated":
+                    continue
+                if infores.get("knowledge level") not in ["curated",
+                                                          "predicted",
+                                                          "text_mined",
+                                                          "correlation",
+                                                          "observed",
+                                                          "other",
+                                                          "mixed"]:
+                    print(infores)
+                    print("Invalid infores knowledge level:" + infores.get("knowledge level")
+                          + " for " + infores.get("name"))
+                    raise ValueError("invalid return code for " + infores.get("name") + " for " + infores.get("id"))
+
+                if infores.get("agent type") not in ["not_provided", "computational_model"]:
+                    print(infores)
+                    print("Invalid infores agent type:" + infores.get("agent type") + " for " + infores.get("name"))
+                    raise ValueError("invalid return code for " + infores.get("name") + " for " + infores.get("id"))
+
                 if infores.get("id") == 'infores:athena' \
                         or infores.get("id") == 'infores:isb-wellness' \
                         or infores.get("id") == 'infores:isb-incov' \