Add constants + I/O for new conditions/experiments tables

* constants * read/write experiment table * add experiments table to Problem, and populate from yaml * some first validation tasks To be complemented by separate pull requests.
PEtab-dev · Dec 5, 2024 · 7dd3e41 · 7dd3e41
1 parent d3e4006
commit 7dd3e41
Show file tree

Hide file tree

Showing 11 changed files with 338 additions and 16 deletions.
diff --git a/doc/modules.rst b/doc/modules.rst
@@ -30,5 +30,8 @@ API Reference
    petab.v1.yaml
    petab.v2
    petab.v2.C
+   petab.v2.experiments
    petab.v2.lint
+   petab.v2.models
    petab.v2.problem
+   petab.v2.petab1to2
diff --git a/petab/schemas/petab_schema.v2.0.0.yaml b/petab/schemas/petab_schema.v2.0.0.yaml
@@ -76,6 +76,10 @@ properties:
           description: List of PEtab condition files.
           $ref: "#/definitions/list_of_files"
 
+        experiment_files:
+          description: List of PEtab condition files.
+          $ref: "#/definitions/list_of_files"
+
         observable_files:
           description: List of PEtab observable files.
           $ref: "#/definitions/list_of_files"
@@ -92,7 +96,6 @@ properties:
         - model_files
         - observable_files
         - measurement_files
-        - condition_files
 
   extensions:
     type: object

diff --git a/petab/v2/C.py b/petab/v2/C.py
@@ -10,9 +10,14 @@
 #: Observable ID column in the observable and measurement tables
 OBSERVABLE_ID = "observableId"
 
+#: Experiment ID column in the measurement table
+EXPERIMENT_ID = "experimentId"
+
+# TODO: remove
 #: Preequilibration condition ID column in the measurement table
 PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId"
 
+# TODO: remove
 #: Simulation condition ID column in the measurement table
 SIMULATION_CONDITION_ID = "simulationConditionId"
 
@@ -40,13 +45,16 @@
 #: Mandatory columns of measurement table
 MEASUREMENT_DF_REQUIRED_COLS = [
     OBSERVABLE_ID,
+    # TODO: add
+    # EXPERIMENT_ID,
     SIMULATION_CONDITION_ID,
     MEASUREMENT,
     TIME,
 ]
 
 #: Optional columns of measurement table
 MEASUREMENT_DF_OPTIONAL_COLS = [
+    # TODO: remove
     PREEQUILIBRATION_CONDITION_ID,
     OBSERVABLE_PARAMETERS,
     NOISE_PARAMETERS,
@@ -125,9 +133,45 @@
 
 #: Condition ID column in the condition table
 CONDITION_ID = "conditionId"
+# TODO: removed?
 #: Condition name column in the condition table
 CONDITION_NAME = "conditionName"
 
+#: Column in the condition table with the ID of an entity that is changed
+TARGET_ID = "targetId"
+#: Column in the condition table with the type of value that is changed
+VALUE_TYPE = "valueType"
+#: Column in the condition table with the new value of the target entity
+TARGET_VALUE = "targetValue"
+# value types:
+VT_CONSTANT = "constant"
+VT_INITIAL = "initial"
+VT_RATE = "rate"
+VT_ASSIGNMENT = "assignment"
+VT_RELATIVE_RATE = "relativeRate"
+VT_RELATIVE_ASSIGNMENT = "relativeAssignment"
+VALUE_TYPES = [
+    VT_CONSTANT,
+    VT_INITIAL,
+    VT_RATE,
+    VT_ASSIGNMENT,
+    VT_RELATIVE_RATE,
+    VT_RELATIVE_ASSIGNMENT,
+]
+
+CONDITION_DF_COLS = [
+    CONDITION_ID,
+    TARGET_ID,
+    VALUE_TYPE,
+    TARGET_VALUE,
+]
+
+# EXPERIMENTS
+EXPERIMENT_DF_REQUIRED_COLS = [
+    EXPERIMENT_ID,
+    TIME,
+    CONDITION_ID,
+]
 
 # OBSERVABLES
 
@@ -332,6 +376,8 @@
 MODEL_LANGUAGE = "language"
 #: Condition files key in the YAML file
 CONDITION_FILES = "condition_files"
+#: Experiment files key in the YAML file
+EXPERIMENT_FILES = "experiment_files"
 #: Measurement files key in the YAML file
 MEASUREMENT_FILES = "measurement_files"
 #: Observable files key in the YAML file

diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py
@@ -5,6 +5,10 @@
 from warnings import warn
 
 from ..v1 import *  # noqa: F403, F401, E402
+from .experiments import (  # noqa: F401
+    get_experiment_df,
+    write_experiment_df,
+)
 
 # import after v1
 from .problem import Problem  # noqa: F401

diff --git a/petab/v2/experiments.py b/petab/v2/experiments.py
@@ -0,0 +1,40 @@
+"""Functions operating on the PEtab experiments table."""
+from pathlib import Path
+
+import pandas as pd
+
+__all__ = ["get_experiment_df", "write_experiment_df"]
+
+
+def get_experiment_df(
+    experiments_file: str | pd.DataFrame | Path | None,
+) -> pd.DataFrame | None:
+    """
+    Read the provided observable file into a ``pandas.Dataframe``.
+
+    Arguments:
+        experiments_file: Name of the file to read from or pandas.Dataframe.
+
+    Returns:
+        Observable DataFrame
+    """
+    if experiments_file is None:
+        return experiments_file
+
+    if isinstance(experiments_file, str | Path):
+        experiments_file = pd.read_csv(
+            experiments_file, sep="\t", float_precision="round_trip"
+        )
+
+    return experiments_file
+
+
+def write_experiment_df(df: pd.DataFrame, filename: str | Path) -> None:
+    """Write PEtab experiments table
+
+    Arguments:
+        df: PEtab experiments table
+        filename: Destination file name
+    """
+    df = get_experiment_df(df)
+    df.to_csv(filename, sep="\t", index=False)
diff --git a/petab/v2/lint.py b/petab/v2/lint.py
@@ -10,18 +10,6 @@
 import numpy as np
 import pandas as pd
 
-from petab.v1 import (
-    assert_model_parameters_in_condition_or_parameter_table,
-)
-from petab.v1.C import (
-    ESTIMATE,
-    MODEL_ENTITY_ID,
-    NOISE_PARAMETERS,
-    NOMINAL_VALUE,
-    OBSERVABLE_PARAMETERS,
-    PARAMETER_DF_REQUIRED_COLS,
-    PARAMETER_ID,
-)
 from petab.v1.conditions import get_parametric_overrides
 from petab.v1.lint import (
     _check_df,
@@ -42,6 +30,10 @@
     get_valid_parameters_for_parameter_table,
 )
 from petab.v1.visualize.lint import validate_visualization_df
+from petab.v2 import (
+    assert_model_parameters_in_condition_or_parameter_table,
+)
+from petab.v2.C import *
 
 from ..v1 import (
     assert_measurement_conditions_present_in_condition_table,
@@ -61,10 +53,13 @@
     "ValidationTask",
     "CheckModel",
     "CheckTableExists",
+    "CheckValidPetabIdColumn",
     "CheckMeasurementTable",
     "CheckConditionTable",
     "CheckObservableTable",
     "CheckParameterTable",
+    "CheckExperimentTable",
+    "CheckExperimentConditionsExist",
     "CheckAllParametersPresentInParameterTable",
     "CheckValidParameterInConditionOrParameterTable",
     "CheckVisualizationTable",
@@ -214,6 +209,35 @@ def run(self, problem: Problem) -> ValidationIssue | None:
             return ValidationError(f"{self.table_name} table is missing.")
 
 
+class CheckValidPetabIdColumn(ValidationTask):
+    """A task to check that a given column contains only valid PEtab IDs."""
+
+    def __init__(
+        self, table_name: str, column_name: str, required_column: bool = True
+    ):
+        self.table_name = table_name
+        self.column_name = column_name
+        self.required_column = required_column
+
+    def run(self, problem: Problem) -> ValidationIssue | None:
+        df = getattr(problem, f"{self.table_name}_df")
+        if df is None:
+            return
+
+        if self.column_name not in df.columns:
+            if self.required_column:
+                return ValidationError(
+                    f"Column {self.column_name} is missing in "
+                    f"{self.table_name} table."
+                )
+            return
+
+        try:
+            check_ids(df[self.column_name].values, kind=self.column_name)
+        except ValueError as e:
+            return ValidationError(str(e))
+
+
 class CheckMeasurementTable(ValidationTask):
     """A task to validate the measurement table of a PEtab problem."""
 
@@ -356,6 +380,66 @@ def run(self, problem: Problem) -> ValidationIssue | None:
             return ValidationError(str(e))
 
 
+class CheckExperimentTable(ValidationTask):
+    """A task to validate the experiment table of a PEtab problem."""
+
+    def run(self, problem: Problem) -> ValidationIssue | None:
+        if problem.experiment_df is None:
+            return
+
+        df = problem.experiment_df
+
+        try:
+            _check_df(df, EXPERIMENT_DF_REQUIRED_COLS, "experiment")
+        except AssertionError as e:
+            return ValidationError(str(e))
+
+        # valid timepoints
+        invalid = []
+        for time in df[TIME].values:
+            try:
+                time = float(time)
+                if not np.isfinite(time) and time != -np.inf:
+                    invalid.append(time)
+            except ValueError:
+                invalid.append(time)
+        if invalid:
+            return ValidationError(
+                f"Invalid timepoints in experiment table: {invalid}"
+            )
+
+
+class CheckExperimentConditionsExist(ValidationTask):
+    """A task to validate that all conditions in the experiment table exist
+    in the condition table."""
+
+    def run(self, problem: Problem) -> ValidationIssue | None:
+        if problem.experiment_df is None:
+            return
+
+        if (
+            problem.condition_df is None
+            and problem.experiment_df is not None
+            and not problem.experiment_df.empty
+        ):
+            return ValidationError(
+                "Experiment table is non-empty, "
+                "but condition table is missing."
+            )
+
+        required_conditions = problem.experiment_df[CONDITION_ID].unique()
+        existing_conditions = problem.condition_df.index
+
+        missing_conditions = set(required_conditions) - set(
+            existing_conditions
+        )
+        if missing_conditions:
+            return ValidationError(
+                f"Experiment table contains conditions that are not present "
+                f"in the condition table: {missing_conditions}"
+            )
+
+
 class CheckAllParametersPresentInParameterTable(ValidationTask):
     """Ensure all required parameters are contained in the parameter table
     with no additional ones."""
@@ -558,6 +642,10 @@ def append_overrides(overrides):
     CheckModel(),
     CheckMeasurementTable(),
     CheckConditionTable(),
+    CheckExperimentTable(),
+    CheckValidPetabIdColumn("experiment", EXPERIMENT_ID),
+    CheckValidPetabIdColumn("experiment", CONDITION_ID),
+    CheckExperimentConditionsExist(),
     CheckObservableTable(),
     CheckObservablesDoNotShadowModelEntities(),
     CheckParameterTable(),