From 7dd3e41c7305ef9ce933e14bce6c5ebe11de1102 Mon Sep 17 00:00:00 2001
From: Daniel Weindl <daniel.weindl@uni-bonn.de>
Date: Thu, 5 Dec 2024 15:27:05 +0100
Subject: [PATCH] Add constants + I/O for new conditions/experiments tables

* constants
* read/write experiment table
* add experiments table to Problem, and populate from yaml
* some first validation tasks

To be complemented by separate pull requests.
---
 doc/modules.rst                        |   3 +
 petab/schemas/petab_schema.v2.0.0.yaml |   5 +-
 petab/v2/C.py                          |  46 ++++++++++
 petab/v2/__init__.py                   |   4 +
 petab/v2/experiments.py                |  40 +++++++++
 petab/v2/lint.py                       | 112 ++++++++++++++++++++++---
 petab/v2/problem.py                    |  66 ++++++++++++++-
 pytest.ini                             |   2 +
 tests/v2/test_experiments.py           |  30 +++++++
 tests/v2/test_lint.py                  |  32 +++++++
 tests/v2/test_problem.py               |  14 ++++
 11 files changed, 338 insertions(+), 16 deletions(-)
 create mode 100644 petab/v2/experiments.py
 create mode 100644 tests/v2/test_experiments.py
 create mode 100644 tests/v2/test_lint.py

diff --git a/doc/modules.rst b/doc/modules.rst
index 8d6335c8..87a9559d 100644
--- a/doc/modules.rst
+++ b/doc/modules.rst
@@ -30,5 +30,8 @@ API Reference
    petab.v1.yaml
    petab.v2
    petab.v2.C
+   petab.v2.experiments
    petab.v2.lint
+   petab.v2.models
    petab.v2.problem
+   petab.v2.petab1to2
diff --git a/petab/schemas/petab_schema.v2.0.0.yaml b/petab/schemas/petab_schema.v2.0.0.yaml
index ddeb428a..36446a82 100644
--- a/petab/schemas/petab_schema.v2.0.0.yaml
+++ b/petab/schemas/petab_schema.v2.0.0.yaml
@@ -76,6 +76,10 @@ properties:
           description: List of PEtab condition files.
           $ref: "#/definitions/list_of_files"
 
+        experiment_files:
+          description: List of PEtab condition files.
+          $ref: "#/definitions/list_of_files"
+
         observable_files:
           description: List of PEtab observable files.
           $ref: "#/definitions/list_of_files"
@@ -92,7 +96,6 @@ properties:
         - model_files
         - observable_files
         - measurement_files
-        - condition_files
 
   extensions:
     type: object
diff --git a/petab/v2/C.py b/petab/v2/C.py
index 11fede25..2d55355a 100644
--- a/petab/v2/C.py
+++ b/petab/v2/C.py
@@ -10,9 +10,14 @@
 #: Observable ID column in the observable and measurement tables
 OBSERVABLE_ID = "observableId"
 
+#: Experiment ID column in the measurement table
+EXPERIMENT_ID = "experimentId"
+
+# TODO: remove
 #: Preequilibration condition ID column in the measurement table
 PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId"
 
+# TODO: remove
 #: Simulation condition ID column in the measurement table
 SIMULATION_CONDITION_ID = "simulationConditionId"
 
@@ -40,6 +45,8 @@
 #: Mandatory columns of measurement table
 MEASUREMENT_DF_REQUIRED_COLS = [
     OBSERVABLE_ID,
+    # TODO: add
+    # EXPERIMENT_ID,
     SIMULATION_CONDITION_ID,
     MEASUREMENT,
     TIME,
@@ -47,6 +54,7 @@
 
 #: Optional columns of measurement table
 MEASUREMENT_DF_OPTIONAL_COLS = [
+    # TODO: remove
     PREEQUILIBRATION_CONDITION_ID,
     OBSERVABLE_PARAMETERS,
     NOISE_PARAMETERS,
@@ -125,9 +133,45 @@
 
 #: Condition ID column in the condition table
 CONDITION_ID = "conditionId"
+# TODO: removed?
 #: Condition name column in the condition table
 CONDITION_NAME = "conditionName"
 
+#: Column in the condition table with the ID of an entity that is changed
+TARGET_ID = "targetId"
+#: Column in the condition table with the type of value that is changed
+VALUE_TYPE = "valueType"
+#: Column in the condition table with the new value of the target entity
+TARGET_VALUE = "targetValue"
+# value types:
+VT_CONSTANT = "constant"
+VT_INITIAL = "initial"
+VT_RATE = "rate"
+VT_ASSIGNMENT = "assignment"
+VT_RELATIVE_RATE = "relativeRate"
+VT_RELATIVE_ASSIGNMENT = "relativeAssignment"
+VALUE_TYPES = [
+    VT_CONSTANT,
+    VT_INITIAL,
+    VT_RATE,
+    VT_ASSIGNMENT,
+    VT_RELATIVE_RATE,
+    VT_RELATIVE_ASSIGNMENT,
+]
+
+CONDITION_DF_COLS = [
+    CONDITION_ID,
+    TARGET_ID,
+    VALUE_TYPE,
+    TARGET_VALUE,
+]
+
+# EXPERIMENTS
+EXPERIMENT_DF_REQUIRED_COLS = [
+    EXPERIMENT_ID,
+    TIME,
+    CONDITION_ID,
+]
 
 # OBSERVABLES
 
@@ -332,6 +376,8 @@
 MODEL_LANGUAGE = "language"
 #: Condition files key in the YAML file
 CONDITION_FILES = "condition_files"
+#: Experiment files key in the YAML file
+EXPERIMENT_FILES = "experiment_files"
 #: Measurement files key in the YAML file
 MEASUREMENT_FILES = "measurement_files"
 #: Observable files key in the YAML file
diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py
index 98084fa5..ca55f7d0 100644
--- a/petab/v2/__init__.py
+++ b/petab/v2/__init__.py
@@ -5,6 +5,10 @@
 from warnings import warn
 
 from ..v1 import *  # noqa: F403, F401, E402
+from .experiments import (  # noqa: F401
+    get_experiment_df,
+    write_experiment_df,
+)
 
 # import after v1
 from .problem import Problem  # noqa: F401
diff --git a/petab/v2/experiments.py b/petab/v2/experiments.py
new file mode 100644
index 00000000..7833fa1f
--- /dev/null
+++ b/petab/v2/experiments.py
@@ -0,0 +1,40 @@
+"""Functions operating on the PEtab experiments table."""
+from pathlib import Path
+
+import pandas as pd
+
+__all__ = ["get_experiment_df", "write_experiment_df"]
+
+
+def get_experiment_df(
+    experiments_file: str | pd.DataFrame | Path | None,
+) -> pd.DataFrame | None:
+    """
+    Read the provided observable file into a ``pandas.Dataframe``.
+
+    Arguments:
+        experiments_file: Name of the file to read from or pandas.Dataframe.
+
+    Returns:
+        Observable DataFrame
+    """
+    if experiments_file is None:
+        return experiments_file
+
+    if isinstance(experiments_file, str | Path):
+        experiments_file = pd.read_csv(
+            experiments_file, sep="\t", float_precision="round_trip"
+        )
+
+    return experiments_file
+
+
+def write_experiment_df(df: pd.DataFrame, filename: str | Path) -> None:
+    """Write PEtab experiments table
+
+    Arguments:
+        df: PEtab experiments table
+        filename: Destination file name
+    """
+    df = get_experiment_df(df)
+    df.to_csv(filename, sep="\t", index=False)
diff --git a/petab/v2/lint.py b/petab/v2/lint.py
index 87554e64..fdf6de0c 100644
--- a/petab/v2/lint.py
+++ b/petab/v2/lint.py
@@ -10,18 +10,6 @@
 import numpy as np
 import pandas as pd
 
-from petab.v1 import (
-    assert_model_parameters_in_condition_or_parameter_table,
-)
-from petab.v1.C import (
-    ESTIMATE,
-    MODEL_ENTITY_ID,
-    NOISE_PARAMETERS,
-    NOMINAL_VALUE,
-    OBSERVABLE_PARAMETERS,
-    PARAMETER_DF_REQUIRED_COLS,
-    PARAMETER_ID,
-)
 from petab.v1.conditions import get_parametric_overrides
 from petab.v1.lint import (
     _check_df,
@@ -42,6 +30,10 @@
     get_valid_parameters_for_parameter_table,
 )
 from petab.v1.visualize.lint import validate_visualization_df
+from petab.v2 import (
+    assert_model_parameters_in_condition_or_parameter_table,
+)
+from petab.v2.C import *
 
 from ..v1 import (
     assert_measurement_conditions_present_in_condition_table,
@@ -61,10 +53,13 @@
     "ValidationTask",
     "CheckModel",
     "CheckTableExists",
+    "CheckValidPetabIdColumn",
     "CheckMeasurementTable",
     "CheckConditionTable",
     "CheckObservableTable",
     "CheckParameterTable",
+    "CheckExperimentTable",
+    "CheckExperimentConditionsExist",
     "CheckAllParametersPresentInParameterTable",
     "CheckValidParameterInConditionOrParameterTable",
     "CheckVisualizationTable",
@@ -214,6 +209,35 @@ def run(self, problem: Problem) -> ValidationIssue | None:
             return ValidationError(f"{self.table_name} table is missing.")
 
 
+class CheckValidPetabIdColumn(ValidationTask):
+    """A task to check that a given column contains only valid PEtab IDs."""
+
+    def __init__(
+        self, table_name: str, column_name: str, required_column: bool = True
+    ):
+        self.table_name = table_name
+        self.column_name = column_name
+        self.required_column = required_column
+
+    def run(self, problem: Problem) -> ValidationIssue | None:
+        df = getattr(problem, f"{self.table_name}_df")
+        if df is None:
+            return
+
+        if self.column_name not in df.columns:
+            if self.required_column:
+                return ValidationError(
+                    f"Column {self.column_name} is missing in "
+                    f"{self.table_name} table."
+                )
+            return
+
+        try:
+            check_ids(df[self.column_name].values, kind=self.column_name)
+        except ValueError as e:
+            return ValidationError(str(e))
+
+
 class CheckMeasurementTable(ValidationTask):
     """A task to validate the measurement table of a PEtab problem."""
 
@@ -356,6 +380,66 @@ def run(self, problem: Problem) -> ValidationIssue | None:
             return ValidationError(str(e))
 
 
+class CheckExperimentTable(ValidationTask):
+    """A task to validate the experiment table of a PEtab problem."""
+
+    def run(self, problem: Problem) -> ValidationIssue | None:
+        if problem.experiment_df is None:
+            return
+
+        df = problem.experiment_df
+
+        try:
+            _check_df(df, EXPERIMENT_DF_REQUIRED_COLS, "experiment")
+        except AssertionError as e:
+            return ValidationError(str(e))
+
+        # valid timepoints
+        invalid = []
+        for time in df[TIME].values:
+            try:
+                time = float(time)
+                if not np.isfinite(time) and time != -np.inf:
+                    invalid.append(time)
+            except ValueError:
+                invalid.append(time)
+        if invalid:
+            return ValidationError(
+                f"Invalid timepoints in experiment table: {invalid}"
+            )
+
+
+class CheckExperimentConditionsExist(ValidationTask):
+    """A task to validate that all conditions in the experiment table exist
+    in the condition table."""
+
+    def run(self, problem: Problem) -> ValidationIssue | None:
+        if problem.experiment_df is None:
+            return
+
+        if (
+            problem.condition_df is None
+            and problem.experiment_df is not None
+            and not problem.experiment_df.empty
+        ):
+            return ValidationError(
+                "Experiment table is non-empty, "
+                "but condition table is missing."
+            )
+
+        required_conditions = problem.experiment_df[CONDITION_ID].unique()
+        existing_conditions = problem.condition_df.index
+
+        missing_conditions = set(required_conditions) - set(
+            existing_conditions
+        )
+        if missing_conditions:
+            return ValidationError(
+                f"Experiment table contains conditions that are not present "
+                f"in the condition table: {missing_conditions}"
+            )
+
+
 class CheckAllParametersPresentInParameterTable(ValidationTask):
     """Ensure all required parameters are contained in the parameter table
     with no additional ones."""
@@ -558,6 +642,10 @@ def append_overrides(overrides):
     CheckModel(),
     CheckMeasurementTable(),
     CheckConditionTable(),
+    CheckExperimentTable(),
+    CheckValidPetabIdColumn("experiment", EXPERIMENT_ID),
+    CheckValidPetabIdColumn("experiment", CONDITION_ID),
+    CheckExperimentConditionsExist(),
     CheckObservableTable(),
     CheckObservablesDoNotShadowModelEntities(),
     CheckParameterTable(),
diff --git a/petab/v2/problem.py b/petab/v2/problem.py
index 4c36d791..572db825 100644
--- a/petab/v2/problem.py
+++ b/petab/v2/problem.py
@@ -4,6 +4,7 @@
 import logging
 import os
 import tempfile
+import warnings
 from math import nan
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -21,9 +22,10 @@
     sampling,
     yaml,
 )
-from ..v1.C import *  # noqa: F403
 from ..v1.models.model import Model, model_factory
 from ..v1.yaml import get_path_prefix
+from ..v2.C import *  # noqa: F403
+from . import experiments
 
 if TYPE_CHECKING:
     from ..v2.lint import ValidationIssue, ValidationResultList, ValidationTask
@@ -38,6 +40,7 @@ class Problem:
 
     - model
     - condition table
+    - experiment table
     - measurement table
     - parameter table
     - observables table
@@ -47,6 +50,7 @@ class Problem:
 
     Parameters:
         condition_df: PEtab condition table
+        experiment_df: PEtab experiment table
         measurement_df: PEtab measurement table
         parameter_df: PEtab parameter table
         observable_df: PEtab observable table
@@ -60,6 +64,7 @@ def __init__(
         self,
         model: Model = None,
         condition_df: pd.DataFrame = None,
+        experiment_df: pd.DataFrame = None,
         measurement_df: pd.DataFrame = None,
         parameter_df: pd.DataFrame = None,
         visualization_df: pd.DataFrame = None,
@@ -70,6 +75,7 @@ def __init__(
         from ..v2.lint import default_validation_tasks
 
         self.condition_df: pd.DataFrame | None = condition_df
+        self.experiment_df: pd.DataFrame | None = experiment_df
         self.measurement_df: pd.DataFrame | None = measurement_df
         self.parameter_df: pd.DataFrame | None = parameter_df
         self.visualization_df: pd.DataFrame | None = visualization_df
@@ -81,8 +87,22 @@ def __init__(
             ValidationTask
         ] = default_validation_tasks.copy()
 
+        if self.experiment_df is not None:
+            warnings.warn(
+                "The experiment table is not yet supported and "
+                "will be ignored.",
+                stacklevel=2,
+            )
+
     def __str__(self):
         model = f"with model ({self.model})" if self.model else "without model"
+
+        experiments = (
+            f"{self.experiment_df.shape[0]} experiments"
+            if self.experiment_df is not None
+            else "without experiments table"
+        )
+
         conditions = (
             f"{self.condition_df.shape[0]} conditions"
             if self.condition_df is not None
@@ -112,8 +132,8 @@ def __str__(self):
             parameters = "without parameter_df table"
 
         return (
-            f"PEtab Problem {model}, {conditions}, {observables}, "
-            f"{measurements}, {parameters}"
+            f"PEtab Problem {model}, {conditions}, {experiments}, "
+            f"{observables}, {measurements}, {parameters}"
         )
 
     @staticmethod
@@ -230,6 +250,16 @@ def get_path(filename):
             else None
         )
 
+        experiment_files = [
+            get_path(f) for f in problem0.get(EXPERIMENT_FILES, [])
+        ]
+        # If there are multiple tables, we will merge them
+        experiment_df = (
+            core.concat_tables(experiment_files, experiments.get_experiment_df)
+            if experiment_files
+            else None
+        )
+
         visualization_files = [
             get_path(f) for f in problem0.get(VISUALIZATION_FILES, [])
         ]
@@ -260,6 +290,7 @@ def get_path(filename):
 
         return Problem(
             condition_df=condition_df,
+            experiment_df=experiment_df,
             measurement_df=measurement_df,
             parameter_df=parameter_df,
             observable_df=observable_df,
@@ -724,3 +755,32 @@ def validate(
                     break
 
         return validation_results
+
+    def add_experiment(self, id_: str, *args):
+        """Add an experiment to the problem.
+
+        :param id_: The experiment ID.
+        :param args: Timepoints and associated conditions:
+            ``time_1, condition_id_1, time_2, condition_id_2, ...``.
+        """
+        if len(args) % 2 != 0:
+            raise ValueError(
+                "Arguments must be pairs of timepoints and condition IDs."
+            )
+
+        records = []
+        for i in range(0, len(args), 2):
+            records.append(
+                {
+                    EXPERIMENT_ID: id_,
+                    TIME: args[i],
+                    CONDITION_ID: args[i + 1],
+                }
+            )
+        tmp_df = pd.DataFrame(records)
+        if self.experiment_df is None:
+            self.experiment_df = tmp_df
+        else:
+            self.experiment_df = pd.concat(
+                [self.experiment_df, tmp_df], ignore_index=True
+            )
diff --git a/pytest.ini b/pytest.ini
index 11b8918a..1e9b4286 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -7,3 +7,5 @@ filterwarnings =
     ignore:Support for PEtab2.0 is experimental:UserWarning
     ignore:.*inspect.getargspec\(\) is deprecated.*:DeprecationWarning
     ignore:.*Passing unrecognized arguments to super\(PyDevIPCompleter6\).*:DeprecationWarning
+    # TODO: until we have proper v2 support
+    ignore:The experiment table is not yet supported and will be ignored:UserWarning
diff --git a/tests/v2/test_experiments.py b/tests/v2/test_experiments.py
new file mode 100644
index 00000000..234552f2
--- /dev/null
+++ b/tests/v2/test_experiments.py
@@ -0,0 +1,30 @@
+"""Tests related to ``petab.v2.experiments``."""
+from tempfile import TemporaryDirectory
+
+import pandas as pd
+
+from petab.v2.C import CONDITION_ID, EXPERIMENT_ID, TIME
+from petab.v2.experiments import get_experiment_df, write_experiment_df
+
+
+def test_experiment_df_io():
+    # Test None
+    assert get_experiment_df(None) is None
+
+    # Test DataFrame
+    df = pd.DataFrame(
+        {
+            EXPERIMENT_ID: ["e1", "e2"],
+            CONDITION_ID: ["c1", "c2"],
+            TIME: [0, 1],
+        }
+    )
+    df = get_experiment_df(df)
+    assert df.shape == (2, 3)
+
+    # Test writing to file and round trip
+    with TemporaryDirectory() as tmpdir:
+        tmpfile = f"{tmpdir}/experiment.csv"
+        write_experiment_df(df, tmpfile)
+        df2 = get_experiment_df(tmpfile)
+        assert df.equals(df2)
diff --git a/tests/v2/test_lint.py b/tests/v2/test_lint.py
new file mode 100644
index 00000000..db0c402a
--- /dev/null
+++ b/tests/v2/test_lint.py
@@ -0,0 +1,32 @@
+"""Test related to ``petab.v2.lint``."""
+
+from copy import deepcopy
+
+from petab.v2 import Problem
+from petab.v2.C import *
+from petab.v2.lint import *
+
+
+def test_check_experiments():
+    """Test ``CheckExperimentTable``."""
+    problem = Problem()
+    problem.add_experiment("e1", 0, "c1", 1, "c2")
+    problem.add_experiment("e2", "-inf", "c1", 1, "c2")
+    assert problem.experiment_df.shape == (4, 3)
+
+    check = CheckExperimentTable()
+    assert check.run(problem) is None
+
+    assert check.run(Problem()) is None
+
+    tmp_problem = deepcopy(problem)
+    tmp_problem.experiment_df.loc[0, TIME] = "invalid"
+    assert check.run(tmp_problem) is not None
+
+    tmp_problem = deepcopy(problem)
+    tmp_problem.experiment_df.loc[0, TIME] = "inf"
+    assert check.run(tmp_problem) is not None
+
+    tmp_problem = deepcopy(problem)
+    tmp_problem.experiment_df.drop(columns=[TIME], inplace=True)
+    assert check.run(tmp_problem) is not None
diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py
index 418f7818..244160e5 100644
--- a/tests/v2/test_problem.py
+++ b/tests/v2/test_problem.py
@@ -7,6 +7,7 @@
 from petab.v2 import Problem
 from petab.v2.C import (
     CONDITION_ID,
+    EXPERIMENT_ID,
     MEASUREMENT,
     NOISE_FORMULA,
     OBSERVABLE_FORMULA,
@@ -54,6 +55,7 @@ def test_problem_from_yaml_multiple_files():
       measurement_files: [measurements1.tsv, measurements2.tsv]
       observable_files: [observables1.tsv, observables2.tsv]
       model_files:
+      experiment_files: [experiments1.tsv, experiments2.tsv]
     """
 
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -72,6 +74,17 @@ def test_problem_from_yaml_multiple_files():
                 condition_df, Path(tmpdir, f"conditions{i}.tsv")
             )
 
+            experiment_df = pd.DataFrame(
+                {
+                    EXPERIMENT_ID: [f"experiment{i}"],
+                    TIME: [0],
+                    CONDITION_ID: [f"condition{i}"],
+                }
+            ).set_index([EXPERIMENT_ID])
+            petab.write_experiment_df(
+                experiment_df, Path(tmpdir, f"experiments{i}.tsv")
+            )
+
             measurement_df = pd.DataFrame(
                 {
                     SIMULATION_CONDITION_ID: [f"condition{i}"],
@@ -105,3 +118,4 @@ def test_problem_from_yaml_multiple_files():
         assert petab_problem.measurement_df.shape[0] == 2
         assert petab_problem.observable_df.shape[0] == 2
         assert petab_problem.condition_df.shape[0] == 2
+        assert petab_problem.experiment_df.shape[0] == 2