From 4be03c84dcb4a88972129582cee124f7a9b8ceb3 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 9 Dec 2024 14:54:43 +0100 Subject: [PATCH] Add constants + I/O for new conditions/experiments tables (#334) Related to https://github.com/PEtab-dev/PEtab/issues/586 * constants for new yaml fields / table columns / ... * read/write experiment table * add experiments table to Problem, and populate from yaml * add first validation functions * include missing modules in API docs To be complemented by separate pull requests. --- doc/modules.rst | 3 + petab/schemas/petab_schema.v2.0.0.yaml | 5 +- petab/v2/C.py | 46 ++++++++++ petab/v2/__init__.py | 4 + petab/v2/experiments.py | 40 +++++++++ petab/v2/lint.py | 112 ++++++++++++++++++++++--- petab/v2/problem.py | 65 +++++++++++++- pytest.ini | 2 + tests/v2/test_experiments.py | 30 +++++++ tests/v2/test_lint.py | 32 +++++++ tests/v2/test_problem.py | 7 ++ 11 files changed, 330 insertions(+), 16 deletions(-) create mode 100644 petab/v2/experiments.py create mode 100644 tests/v2/test_experiments.py create mode 100644 tests/v2/test_lint.py diff --git a/doc/modules.rst b/doc/modules.rst index 8d6335c8..87a9559d 100644 --- a/doc/modules.rst +++ b/doc/modules.rst @@ -30,5 +30,8 @@ API Reference petab.v1.yaml petab.v2 petab.v2.C + petab.v2.experiments petab.v2.lint + petab.v2.models petab.v2.problem + petab.v2.petab1to2 diff --git a/petab/schemas/petab_schema.v2.0.0.yaml b/petab/schemas/petab_schema.v2.0.0.yaml index ddeb428a..b4d7c358 100644 --- a/petab/schemas/petab_schema.v2.0.0.yaml +++ b/petab/schemas/petab_schema.v2.0.0.yaml @@ -76,6 +76,10 @@ properties: description: List of PEtab condition files. $ref: "#/definitions/list_of_files" + experiment_files: + description: List of PEtab experiment files. + $ref: "#/definitions/list_of_files" + observable_files: description: List of PEtab observable files. $ref: "#/definitions/list_of_files" @@ -92,7 +96,6 @@ properties: - model_files - observable_files - measurement_files - - condition_files extensions: type: object diff --git a/petab/v2/C.py b/petab/v2/C.py index 11fede25..2d55355a 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -10,9 +10,14 @@ #: Observable ID column in the observable and measurement tables OBSERVABLE_ID = "observableId" +#: Experiment ID column in the measurement table +EXPERIMENT_ID = "experimentId" + +# TODO: remove #: Preequilibration condition ID column in the measurement table PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId" +# TODO: remove #: Simulation condition ID column in the measurement table SIMULATION_CONDITION_ID = "simulationConditionId" @@ -40,6 +45,8 @@ #: Mandatory columns of measurement table MEASUREMENT_DF_REQUIRED_COLS = [ OBSERVABLE_ID, + # TODO: add + # EXPERIMENT_ID, SIMULATION_CONDITION_ID, MEASUREMENT, TIME, @@ -47,6 +54,7 @@ #: Optional columns of measurement table MEASUREMENT_DF_OPTIONAL_COLS = [ + # TODO: remove PREEQUILIBRATION_CONDITION_ID, OBSERVABLE_PARAMETERS, NOISE_PARAMETERS, @@ -125,9 +133,45 @@ #: Condition ID column in the condition table CONDITION_ID = "conditionId" +# TODO: removed? #: Condition name column in the condition table CONDITION_NAME = "conditionName" +#: Column in the condition table with the ID of an entity that is changed +TARGET_ID = "targetId" +#: Column in the condition table with the type of value that is changed +VALUE_TYPE = "valueType" +#: Column in the condition table with the new value of the target entity +TARGET_VALUE = "targetValue" +# value types: +VT_CONSTANT = "constant" +VT_INITIAL = "initial" +VT_RATE = "rate" +VT_ASSIGNMENT = "assignment" +VT_RELATIVE_RATE = "relativeRate" +VT_RELATIVE_ASSIGNMENT = "relativeAssignment" +VALUE_TYPES = [ + VT_CONSTANT, + VT_INITIAL, + VT_RATE, + VT_ASSIGNMENT, + VT_RELATIVE_RATE, + VT_RELATIVE_ASSIGNMENT, +] + +CONDITION_DF_COLS = [ + CONDITION_ID, + TARGET_ID, + VALUE_TYPE, + TARGET_VALUE, +] + +# EXPERIMENTS +EXPERIMENT_DF_REQUIRED_COLS = [ + EXPERIMENT_ID, + TIME, + CONDITION_ID, +] # OBSERVABLES @@ -332,6 +376,8 @@ MODEL_LANGUAGE = "language" #: Condition files key in the YAML file CONDITION_FILES = "condition_files" +#: Experiment files key in the YAML file +EXPERIMENT_FILES = "experiment_files" #: Measurement files key in the YAML file MEASUREMENT_FILES = "measurement_files" #: Observable files key in the YAML file diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index 98084fa5..ca55f7d0 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -5,6 +5,10 @@ from warnings import warn from ..v1 import * # noqa: F403, F401, E402 +from .experiments import ( # noqa: F401 + get_experiment_df, + write_experiment_df, +) # import after v1 from .problem import Problem # noqa: F401 diff --git a/petab/v2/experiments.py b/petab/v2/experiments.py new file mode 100644 index 00000000..7833fa1f --- /dev/null +++ b/petab/v2/experiments.py @@ -0,0 +1,40 @@ +"""Functions operating on the PEtab experiments table.""" +from pathlib import Path + +import pandas as pd + +__all__ = ["get_experiment_df", "write_experiment_df"] + + +def get_experiment_df( + experiments_file: str | pd.DataFrame | Path | None, +) -> pd.DataFrame | None: + """ + Read the provided observable file into a ``pandas.Dataframe``. + + Arguments: + experiments_file: Name of the file to read from or pandas.Dataframe. + + Returns: + Observable DataFrame + """ + if experiments_file is None: + return experiments_file + + if isinstance(experiments_file, str | Path): + experiments_file = pd.read_csv( + experiments_file, sep="\t", float_precision="round_trip" + ) + + return experiments_file + + +def write_experiment_df(df: pd.DataFrame, filename: str | Path) -> None: + """Write PEtab experiments table + + Arguments: + df: PEtab experiments table + filename: Destination file name + """ + df = get_experiment_df(df) + df.to_csv(filename, sep="\t", index=False) diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 87554e64..fdf6de0c 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -10,18 +10,6 @@ import numpy as np import pandas as pd -from petab.v1 import ( - assert_model_parameters_in_condition_or_parameter_table, -) -from petab.v1.C import ( - ESTIMATE, - MODEL_ENTITY_ID, - NOISE_PARAMETERS, - NOMINAL_VALUE, - OBSERVABLE_PARAMETERS, - PARAMETER_DF_REQUIRED_COLS, - PARAMETER_ID, -) from petab.v1.conditions import get_parametric_overrides from petab.v1.lint import ( _check_df, @@ -42,6 +30,10 @@ get_valid_parameters_for_parameter_table, ) from petab.v1.visualize.lint import validate_visualization_df +from petab.v2 import ( + assert_model_parameters_in_condition_or_parameter_table, +) +from petab.v2.C import * from ..v1 import ( assert_measurement_conditions_present_in_condition_table, @@ -61,10 +53,13 @@ "ValidationTask", "CheckModel", "CheckTableExists", + "CheckValidPetabIdColumn", "CheckMeasurementTable", "CheckConditionTable", "CheckObservableTable", "CheckParameterTable", + "CheckExperimentTable", + "CheckExperimentConditionsExist", "CheckAllParametersPresentInParameterTable", "CheckValidParameterInConditionOrParameterTable", "CheckVisualizationTable", @@ -214,6 +209,35 @@ def run(self, problem: Problem) -> ValidationIssue | None: return ValidationError(f"{self.table_name} table is missing.") +class CheckValidPetabIdColumn(ValidationTask): + """A task to check that a given column contains only valid PEtab IDs.""" + + def __init__( + self, table_name: str, column_name: str, required_column: bool = True + ): + self.table_name = table_name + self.column_name = column_name + self.required_column = required_column + + def run(self, problem: Problem) -> ValidationIssue | None: + df = getattr(problem, f"{self.table_name}_df") + if df is None: + return + + if self.column_name not in df.columns: + if self.required_column: + return ValidationError( + f"Column {self.column_name} is missing in " + f"{self.table_name} table." + ) + return + + try: + check_ids(df[self.column_name].values, kind=self.column_name) + except ValueError as e: + return ValidationError(str(e)) + + class CheckMeasurementTable(ValidationTask): """A task to validate the measurement table of a PEtab problem.""" @@ -356,6 +380,66 @@ def run(self, problem: Problem) -> ValidationIssue | None: return ValidationError(str(e)) +class CheckExperimentTable(ValidationTask): + """A task to validate the experiment table of a PEtab problem.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + if problem.experiment_df is None: + return + + df = problem.experiment_df + + try: + _check_df(df, EXPERIMENT_DF_REQUIRED_COLS, "experiment") + except AssertionError as e: + return ValidationError(str(e)) + + # valid timepoints + invalid = [] + for time in df[TIME].values: + try: + time = float(time) + if not np.isfinite(time) and time != -np.inf: + invalid.append(time) + except ValueError: + invalid.append(time) + if invalid: + return ValidationError( + f"Invalid timepoints in experiment table: {invalid}" + ) + + +class CheckExperimentConditionsExist(ValidationTask): + """A task to validate that all conditions in the experiment table exist + in the condition table.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + if problem.experiment_df is None: + return + + if ( + problem.condition_df is None + and problem.experiment_df is not None + and not problem.experiment_df.empty + ): + return ValidationError( + "Experiment table is non-empty, " + "but condition table is missing." + ) + + required_conditions = problem.experiment_df[CONDITION_ID].unique() + existing_conditions = problem.condition_df.index + + missing_conditions = set(required_conditions) - set( + existing_conditions + ) + if missing_conditions: + return ValidationError( + f"Experiment table contains conditions that are not present " + f"in the condition table: {missing_conditions}" + ) + + class CheckAllParametersPresentInParameterTable(ValidationTask): """Ensure all required parameters are contained in the parameter table with no additional ones.""" @@ -558,6 +642,10 @@ def append_overrides(overrides): CheckModel(), CheckMeasurementTable(), CheckConditionTable(), + CheckExperimentTable(), + CheckValidPetabIdColumn("experiment", EXPERIMENT_ID), + CheckValidPetabIdColumn("experiment", CONDITION_ID), + CheckExperimentConditionsExist(), CheckObservableTable(), CheckObservablesDoNotShadowModelEntities(), CheckParameterTable(), diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 87a9b6e1..c22d74e1 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -4,6 +4,7 @@ import logging import os import tempfile +import warnings from collections.abc import Sequence from math import nan from numbers import Number @@ -23,9 +24,10 @@ sampling, yaml, ) -from ..v1.C import * # noqa: F403 from ..v1.models.model import Model, model_factory from ..v1.yaml import get_path_prefix +from ..v2.C import * # noqa: F403 +from . import experiments if TYPE_CHECKING: from ..v2.lint import ValidationIssue, ValidationResultList, ValidationTask @@ -40,6 +42,7 @@ class Problem: - model - condition table + - experiment table - measurement table - parameter table - observables table @@ -49,6 +52,7 @@ class Problem: Parameters: condition_df: PEtab condition table + experiment_df: PEtab experiment table measurement_df: PEtab measurement table parameter_df: PEtab parameter table observable_df: PEtab observable table @@ -62,6 +66,7 @@ def __init__( self, model: Model = None, condition_df: pd.DataFrame = None, + experiment_df: pd.DataFrame = None, measurement_df: pd.DataFrame = None, parameter_df: pd.DataFrame = None, visualization_df: pd.DataFrame = None, @@ -72,6 +77,7 @@ def __init__( from ..v2.lint import default_validation_tasks self.condition_df: pd.DataFrame | None = condition_df + self.experiment_df: pd.DataFrame | None = experiment_df self.measurement_df: pd.DataFrame | None = measurement_df self.parameter_df: pd.DataFrame | None = parameter_df self.visualization_df: pd.DataFrame | None = visualization_df @@ -83,8 +89,22 @@ def __init__( ValidationTask ] = default_validation_tasks.copy() + if self.experiment_df is not None: + warnings.warn( + "The experiment table is not yet supported and " + "will be ignored.", + stacklevel=2, + ) + def __str__(self): model = f"with model ({self.model})" if self.model else "without model" + + experiments = ( + f"{self.experiment_df.shape[0]} experiments" + if self.experiment_df is not None + else "without experiments table" + ) + conditions = ( f"{self.condition_df.shape[0]} conditions" if self.condition_df is not None @@ -114,8 +134,8 @@ def __str__(self): parameters = "without parameter_df table" return ( - f"PEtab Problem {model}, {conditions}, {observables}, " - f"{measurements}, {parameters}" + f"PEtab Problem {model}, {conditions}, {experiments}, " + f"{observables}, {measurements}, {parameters}" ) @staticmethod @@ -232,6 +252,16 @@ def get_path(filename): else None ) + experiment_files = [ + get_path(f) for f in problem0.get(EXPERIMENT_FILES, []) + ] + # If there are multiple tables, we will merge them + experiment_df = ( + core.concat_tables(experiment_files, experiments.get_experiment_df) + if experiment_files + else None + ) + visualization_files = [ get_path(f) for f in problem0.get(VISUALIZATION_FILES, []) ] @@ -262,6 +292,7 @@ def get_path(filename): return Problem( condition_df=condition_df, + experiment_df=experiment_df, measurement_df=measurement_df, parameter_df=parameter_df, observable_df=observable_df, @@ -922,3 +953,31 @@ def add_mapping(self, petab_id: str, model_id: str): if self.mapping_df is not None else tmp_df ) + + def add_experiment(self, id_: str, *args): + """Add an experiment to the problem. + + :param id_: The experiment ID. + :param args: Timepoints and associated conditions: + ``time_1, condition_id_1, time_2, condition_id_2, ...``. + """ + if len(args) % 2 != 0: + raise ValueError( + "Arguments must be pairs of timepoints and condition IDs." + ) + + records = [] + for i in range(0, len(args), 2): + records.append( + { + EXPERIMENT_ID: id_, + TIME: args[i], + CONDITION_ID: args[i + 1], + } + ) + tmp_df = pd.DataFrame(records) + self.experiment_df = ( + pd.concat([self.experiment_df, tmp_df]) + if self.experiment_df is not None + else tmp_df + ) diff --git a/pytest.ini b/pytest.ini index 11b8918a..1e9b4286 100644 --- a/pytest.ini +++ b/pytest.ini @@ -7,3 +7,5 @@ filterwarnings = ignore:Support for PEtab2.0 is experimental:UserWarning ignore:.*inspect.getargspec\(\) is deprecated.*:DeprecationWarning ignore:.*Passing unrecognized arguments to super\(PyDevIPCompleter6\).*:DeprecationWarning + # TODO: until we have proper v2 support + ignore:The experiment table is not yet supported and will be ignored:UserWarning diff --git a/tests/v2/test_experiments.py b/tests/v2/test_experiments.py new file mode 100644 index 00000000..234552f2 --- /dev/null +++ b/tests/v2/test_experiments.py @@ -0,0 +1,30 @@ +"""Tests related to ``petab.v2.experiments``.""" +from tempfile import TemporaryDirectory + +import pandas as pd + +from petab.v2.C import CONDITION_ID, EXPERIMENT_ID, TIME +from petab.v2.experiments import get_experiment_df, write_experiment_df + + +def test_experiment_df_io(): + # Test None + assert get_experiment_df(None) is None + + # Test DataFrame + df = pd.DataFrame( + { + EXPERIMENT_ID: ["e1", "e2"], + CONDITION_ID: ["c1", "c2"], + TIME: [0, 1], + } + ) + df = get_experiment_df(df) + assert df.shape == (2, 3) + + # Test writing to file and round trip + with TemporaryDirectory() as tmpdir: + tmpfile = f"{tmpdir}/experiment.csv" + write_experiment_df(df, tmpfile) + df2 = get_experiment_df(tmpfile) + assert df.equals(df2) diff --git a/tests/v2/test_lint.py b/tests/v2/test_lint.py new file mode 100644 index 00000000..db0c402a --- /dev/null +++ b/tests/v2/test_lint.py @@ -0,0 +1,32 @@ +"""Test related to ``petab.v2.lint``.""" + +from copy import deepcopy + +from petab.v2 import Problem +from petab.v2.C import * +from petab.v2.lint import * + + +def test_check_experiments(): + """Test ``CheckExperimentTable``.""" + problem = Problem() + problem.add_experiment("e1", 0, "c1", 1, "c2") + problem.add_experiment("e2", "-inf", "c1", 1, "c2") + assert problem.experiment_df.shape == (4, 3) + + check = CheckExperimentTable() + assert check.run(problem) is None + + assert check.run(Problem()) is None + + tmp_problem = deepcopy(problem) + tmp_problem.experiment_df.loc[0, TIME] = "invalid" + assert check.run(tmp_problem) is not None + + tmp_problem = deepcopy(problem) + tmp_problem.experiment_df.loc[0, TIME] = "inf" + assert check.run(tmp_problem) is not None + + tmp_problem = deepcopy(problem) + tmp_problem.experiment_df.drop(columns=[TIME], inplace=True) + assert check.run(tmp_problem) is not None diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index 9d13e3df..41ecc238 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -60,6 +60,7 @@ def test_problem_from_yaml_multiple_files(): measurement_files: [measurements1.tsv, measurements2.tsv] observable_files: [observables1.tsv, observables2.tsv] model_files: + experiment_files: [experiments1.tsv, experiments2.tsv] """ with tempfile.TemporaryDirectory() as tmpdir: yaml_path = Path(tmpdir, "problem.yaml") @@ -73,6 +74,11 @@ def test_problem_from_yaml_multiple_files(): problem.condition_df, Path(tmpdir, f"conditions{i}.tsv") ) + problem.add_experiment(f"experiment{i}", 0, f"condition{i}") + petab.write_experiment_df( + problem.experiment_df, Path(tmpdir, f"experiments{i}.tsv") + ) + problem.add_measurement(f"observable{i}", f"condition{i}", 1, 1) petab.write_measurement_df( problem.measurement_df, Path(tmpdir, f"measurements{i}.tsv") @@ -93,6 +99,7 @@ def test_problem_from_yaml_multiple_files(): assert petab_problem.measurement_df.shape[0] == 2 assert petab_problem.observable_df.shape[0] == 2 assert petab_problem.condition_df.shape[0] == 2 + assert petab_problem.experiment_df.shape[0] == 2 def test_modify_problem():