From 5278956907764bdfde82f0695efa64ff0b368931 Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Thu, 14 Sep 2023 13:58:57 +0100 Subject: [PATCH 01/17] Initial constraint OOP design --- entmoot/constraints.py | 45 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 entmoot/constraints.py diff --git a/entmoot/constraints.py b/entmoot/constraints.py new file mode 100644 index 0000000..b83dcfe --- /dev/null +++ b/entmoot/constraints.py @@ -0,0 +1,45 @@ +from typing import TYPE_CHECKING +from abc import ABC, abstractmethod + +import pyomo.environ as pyo + +if TYPE_CHECKING: + from problem_config import FeatureType + +class Constraint(ABC): + def __init__(self, features: list[str]): + self.features = features + + def _get_feature_idxs(self, feat_list: list["FeatureType"]): + """Get the index of each of the features in the constraint expression""" + all_keys = [feat.name for feat in feat_list] + feat_idxs = [all_keys.index(key) for key in self.features] + return feat_idxs + + def as_pyomo_constraint(self): + return pyo.Constraint() + +class ConstraintType(ABC): + """Contains the type of constraint - whether it is an expression, or a function""" + @abstractmethod + def as_pyomo_constraint(self): + pass + + +class ExpressionConstraint: + def as_pyomo_constraint(self): + return pyo.Constraint(rule=self._get_expr()) + + @abstractmethod + def _get_expr(self): + pass + +class FunctionalConstraint: + def as_pyomo_constraint(self): + return pyo.Constraint(rule=self._get_function()) + + @abstractmethod + def _get_expr(self): + pass + + From 0f6079e9486892cedbf445d3526bb9400699df92 Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Thu, 14 Sep 2023 14:32:46 +0100 Subject: [PATCH 02/17] Implement linear constraints --- entmoot/constraints.py | 78 ++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 25 deletions(-) diff --git a/entmoot/constraints.py b/entmoot/constraints.py index b83dcfe..2c3b59d 100644 --- a/entmoot/constraints.py +++ b/entmoot/constraints.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Callable from abc import ABC, abstractmethod import pyomo.environ as pyo @@ -6,40 +6,68 @@ if TYPE_CHECKING: from problem_config import FeatureType + class Constraint(ABC): - def __init__(self, features: list[str]): - self.features = features + def __init__(self, features_keys: list[str]): + self.feature_keys = features_keys - def _get_feature_idxs(self, feat_list: list["FeatureType"]): - """Get the index of each of the features in the constraint expression""" + def _get_feature_vars( + self, model: pyo.ConcreteModel, feat_list: list["FeatureType"] + ) -> list[pyo.Var]: + """Return a list of all the pyo.Vars, in the order of the constraint definition""" all_keys = [feat.name for feat in feat_list] - feat_idxs = [all_keys.index(key) for key in self.features] - return feat_idxs - - def as_pyomo_constraint(self): - return pyo.Constraint() - -class ConstraintType(ABC): - """Contains the type of constraint - whether it is an expression, or a function""" + feat_idxs = [all_keys.index(key) for key in self.feature_keys] + features = [model._all_feat[i] for i in feat_idxs] + return features + + def as_pyomo_constraint( + self, model: pyo.ConcreteModel, feat_list: list["FeatureType"] + ): + features = self._get_feature_vars(model, feat_list) + return self._as_pyomo_constraint(features) + @abstractmethod - def as_pyomo_constraint(self): + def _as_pyomo_constraint(self, features: list[pyo.Var]) -> pyo.Constraint: pass -class ExpressionConstraint: - def as_pyomo_constraint(self): - return pyo.Constraint(rule=self._get_expr()) - +class ExpressionConstraint(Constraint): + def _as_pyomo_constraint(self, features: list[pyo.Var]) -> pyo.Constraint: + return pyo.Constraint(expr=self._get_expr(features)) + @abstractmethod - def _get_expr(self): + def _get_expr(self, features) -> pyo.Expression: pass - -class FunctionalConstraint: - def as_pyomo_constraint(self): - return pyo.Constraint(rule=self._get_function()) - + + +class FunctionalConstraint(Constraint): + def _as_pyomo_constraint(self, features: list[pyo.Var]) -> pyo.Constraint: + return pyo.Constraint(rule=self._get_function(features)) + @abstractmethod - def _get_expr(self): + def _get_function(self, features) -> Callable[..., pyo.Expression]: pass +class LinearConstraint(ExpressionConstraint): + """Constraint that is a function of X @ C, where X is the feature list, and C + is the list of coefficients.""" + + def __init__(self, feature_keys: list[str], coefficients: list[float], rhs: float): + self.coefficients = coefficients + self.rhs = rhs + super().__init__(feature_keys) + + def _get_lhs(self, features: pyo.ConcreteModel) -> pyo.Expression: + """Get the left-hand side of the linear constraint""" + return sum(f * c for f, c in zip(features, self.coefficients)) + + +class LinearEqualityConstraint(LinearConstraint): + def _get_expr(self, features): + return self._get_lhs(features) == self.rhs + + +class LinearInequalityConstraint(LinearConstraint): + def _get_expr(self, features): + return self._get_lhs(features) <= self.rhs From 8c297a2adb83555da4cc1f295e9362387658c897 Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Sat, 16 Sep 2023 16:10:19 +0100 Subject: [PATCH 03/17] Create NChooseK constraint --- entmoot/constraints.py | 51 ++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/entmoot/constraints.py b/entmoot/constraints.py index 2c3b59d..28b6712 100644 --- a/entmoot/constraints.py +++ b/entmoot/constraints.py @@ -6,6 +6,7 @@ if TYPE_CHECKING: from problem_config import FeatureType +ConstraintFunctionType = Callable[[pyo.ConcreteModel, int], pyo.Expression] class Constraint(ABC): def __init__(self, features_keys: list[str]): @@ -20,19 +21,14 @@ def _get_feature_vars( features = [model._all_feat[i] for i in feat_idxs] return features - def as_pyomo_constraint( - self, model: pyo.ConcreteModel, feat_list: list["FeatureType"] - ): - features = self._get_feature_vars(model, feat_list) - return self._as_pyomo_constraint(features) - - @abstractmethod - def _as_pyomo_constraint(self, features: list[pyo.Var]) -> pyo.Constraint: - pass - class ExpressionConstraint(Constraint): - def _as_pyomo_constraint(self, features: list[pyo.Var]) -> pyo.Constraint: + """Constraints defined by pyomo.Expressions. + + For constraints that can be simply defined by an expression of variables. + """ + def as_pyomo_constraint(self, model: pyo.ConcreteModel, feat_list: list["FeatureType"]) -> pyo.Constraint: + features = self._get_feature_vars(model, feat_list) return pyo.Constraint(expr=self._get_expr(features)) @abstractmethod @@ -41,11 +37,15 @@ def _get_expr(self, features) -> pyo.Expression: class FunctionalConstraint(Constraint): - def _as_pyomo_constraint(self, features: list[pyo.Var]) -> pyo.Constraint: - return pyo.Constraint(rule=self._get_function(features)) + """A constraint that uses a functional approach. + + For constraints that require creating intermediate variables and access to the model.""" + def as_pyomo_constraint(self, model: pyo.ConcreteModel, feat_list: list["FeatureType"]) -> pyo.Constraint: + features = self._get_feature_vars(model, feat_list) + return pyo.Constraint(rule=self._get_function(model, features)) @abstractmethod - def _get_function(self, features) -> Callable[..., pyo.Expression]: + def _get_function(self, features) -> ConstraintFunctionType: pass @@ -71,3 +71,26 @@ def _get_expr(self, features): class LinearInequalityConstraint(LinearConstraint): def _get_expr(self, features): return self._get_lhs(features) <= self.rhs + + +class NChooseKConstraint(FunctionalConstraint): + tol: float = 1e-6 + M: float = 1e6 + def __init__(self, feature_keys: list[str], min_count: int, max_count: int, none_also_valid: bool = False): + self.min_count = min_count + self.max_count = max_count + self.none_also_valid = none_also_valid + super().__init__(feature_keys) + + def _get_function(self, model, features): + model.feat_selected = pyo.Var(range(len(features)), domain=pyo.Binary, initialize=0) + model.ub_selected = pyo.ConstraintList() + model.lb_selected = pyo.ConstraintList() + for i in range(len(features)): + model.ub_selected.add(expr=model.feat_selected[i]*self.M >= features[i]) + model.lb_selected.add(expr=model.feat_selected[i]*self.tol <= features[i]) + + def inner(model, i): + return sum(model.feat_selected.values()) <= self.max_count + + return inner \ No newline at end of file From 3f839c5b2980ab63beed5d082b9b00893ed9cbc6 Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Sat, 16 Sep 2023 16:42:22 +0100 Subject: [PATCH 04/17] Provide example of Constraint usage --- docs/notebooks/constraint_classes.ipynb | 220 ++++++++++++++++++++++++ entmoot/benchmarks.py | 21 +++ entmoot/constraints.py | 4 +- 3 files changed, 243 insertions(+), 2 deletions(-) create mode 100644 docs/notebooks/constraint_classes.ipynb diff --git a/docs/notebooks/constraint_classes.ipynb b/docs/notebooks/constraint_classes.ipynb new file mode 100644 index 0000000..8e50330 --- /dev/null +++ b/docs/notebooks/constraint_classes.ipynb @@ -0,0 +1,220 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Constraint Classes\n", + "\n", + "To make applying constraints to your model easier, some constraints have been \n", + "provided as a part of ENTMOOT." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from entmoot.problem_config import ProblemConfig\n", + "from entmoot.models.enting import Enting\n", + "from entmoot.optimizers.pyomo_opt import PyomoOptimizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### NChooseKConstraint\n", + "\n", + "This constraint is often used in the design of experiments. This applies a bound on the \n", + "number of non-zero variables." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\users\\tobyb\\phd\\entmoot\\entmoot\\models\\mean_models\\tree_ensemble.py:23: UserWarning: No 'train_params' for tree ensemble training specified. Switch training to default params!\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "from entmoot.benchmarks import build_reals_only_problem, eval_reals_only_testfunc\n", + "\n", + "# standard setting up of problem\n", + "problem_config = ProblemConfig(rnd_seed=73)\n", + "build_reals_only_problem(problem_config)\n", + "rnd_sample = problem_config.get_rnd_sample_list(num_samples=50)\n", + "testfunc_evals = eval_reals_only_testfunc(rnd_sample)\n", + "\n", + "params = {\"unc_params\": {\"dist_metric\": \"l1\", \"acq_sense\": \"penalty\"}}\n", + "enting = Enting(problem_config, params=params)\n", + "# fit tree ensemble\n", + "enting.fit(rnd_sample, testfunc_evals)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Set parameter Username\n", + "Academic license - for non-commercial use only - expires 2024-09-06\n", + "Read LP format model from file C:\\Users\\tobyb\\AppData\\Local\\Temp\\tmpzrofd3mo.pyomo.lp\n", + "Reading time = 0.02 seconds\n", + "x1: 2774 rows, 1913 columns, 9130 nonzeros\n", + "Gurobi Optimizer version 10.0.2 build v10.0.2rc0 (win64)\n", + "\n", + "CPU model: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz, instruction set [SSE2|AVX|AVX2|AVX512]\n", + "Thread count: 4 physical cores, 8 logical processors, using up to 8 threads\n", + "\n", + "Optimize a model with 2774 rows, 1913 columns and 9130 nonzeros\n", + "Model fingerprint: 0x31e842ff\n", + "Variable types: 1292 continuous, 621 integer (621 binary)\n", + "Coefficient statistics:\n", + " Matrix range [1e-06, 1e+06]\n", + " Objective range [1e+00, 2e+00]\n", + " Bounds range [1e+00, 5e+00]\n", + " RHS range [1e-04, 5e+00]\n", + "Presolve removed 273 rows and 260 columns\n", + "Presolve time: 0.05s\n", + "Presolved: 2501 rows, 1653 columns, 8080 nonzeros\n", + "Variable types: 1282 continuous, 371 integer (371 binary)\n", + "Found heuristic solution: objective 24.8382603\n", + "Found heuristic solution: objective 19.1248452\n", + "\n", + "Root relaxation: objective 2.576224e+00, 577 iterations, 0.01 seconds (0.01 work units)\n", + "\n", + " Nodes | Current Node | Objective Bounds | Work\n", + " Expl Unexpl | Obj Depth IntInf | Incumbent BestBd Gap | It/Node Time\n", + "\n", + " 0 0 2.57622 0 18 19.12485 2.57622 86.5% - 0s\n", + "H 0 0 3.6397521 2.57622 29.2% - 0s\n", + "H 0 0 3.3869028 3.24087 4.31% - 0s\n", + " 0 0 3.38690 0 6 3.38690 3.38690 0.00% - 0s\n", + "\n", + "Cutting planes:\n", + " Gomory: 4\n", + " Cover: 83\n", + " Implied bound: 402\n", + " Clique: 185\n", + " MIR: 4\n", + " Flow cover: 13\n", + " Network: 8\n", + " RLT: 30\n", + " Relax-and-lift: 98\n", + " PSD: 4\n", + "\n", + "Explored 1 nodes (877 simplex iterations) in 0.14 seconds (0.13 work units)\n", + "Thread count was 8 (of 8 available processors)\n", + "\n", + "Solution count 4: 3.3869 3.63975 19.1248 24.8383 \n", + "\n", + "Optimal solution found (tolerance 1.00e-04)\n", + "Best objective 3.386902819809e+00, best bound 3.386902819809e+00, gap 0.0000%\n" + ] + } + ], + "source": [ + "from entmoot.constraints import NChooseKConstraint\n", + "model_pyo = problem_config.get_pyomo_model_core()\n", + "\n", + "# define the constraint\n", + "# then immediately apply it to the model\n", + "model_pyo.nchoosek = NChooseKConstraint(\n", + " feature_keys=[\"x1\", \"x2\", \"x3\", \"x4\", \"x5\"], \n", + " min_count=1,\n", + " max_count=3,\n", + " none_also_valid=True\n", + ").as_pyomo_constraint(model_pyo, problem_config.feat_list)\n", + "\n", + "\n", + "# optimise the model\n", + "params_pyomo = {\"solver_name\": \"gurobi\"}\n", + "opt_pyo = PyomoOptimizer(problem_config, params=params_pyomo)\n", + "res_pyo = opt_pyo.solve(enting, model_core=model_pyo)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.0, 0.0, 4.088297585401641, 4.888952150927435, 4.944564863420855]\n" + ] + } + ], + "source": [ + "print(res_pyo.opt_point)\n", + "assert 1 <= sum(x > 1e-6 for x in res_pyo.opt_point) <= 3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Defining your own constraint\n", + "\n", + "We have provided some constraints already as a part of ENTMOOT. If these do not \n", + "fit your needs, then you can define your own!\n", + "\n", + "The easiest approach is to subclass ExpressionConstraint, and define some custom expression\n", + "that is a function of the variables. From that, you should be able to use the constraint \n", + "as shown above. This needs to return a pyomo.Expression object. If you need to do \n", + "a more involved procedure that modifies the model, you can use a FunctionalConstraint \n", + "instead (see NChooseKConstraint)." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from entmoot.constraints import ExpressionConstraint\n", + "\n", + "class SumLessThanTen(ExpressionConstraint):\n", + " \"\"\"A constraint that enforces all features to be equal.\"\"\"\n", + " def _get_expr(self, features):\n", + " return sum(features) <= 10" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "enttest", + "language": "python", + "name": "enttest" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/entmoot/benchmarks.py b/entmoot/benchmarks.py index d83e148..bfd917c 100644 --- a/entmoot/benchmarks.py +++ b/entmoot/benchmarks.py @@ -149,3 +149,24 @@ def compute_objectives(xi: Iterable, no_cat=False): f"You provided the illegal value {n_obj} for the number of objectives. " f"Allowed values are 1 and 2" ) + +def build_reals_only_problem(problem_config: ProblemConfig): + """A problem containing only real values, as used to demonstrate the NChooseK + constraint. + + The minimum is (1.0, 2.0, 3.0, ...)""" + + problem_config.add_feature("real", (0.0, 5.0), name="x1") + problem_config.add_feature("real", (0.0, 5.0), name="x2") + problem_config.add_feature("real", (0.0, 5.0), name="x3") + problem_config.add_feature("real", (0.0, 5.0), name="x4") + problem_config.add_feature("real", (0.0, 5.0), name="x5") + problem_config.add_min_objective() + +def eval_reals_only_testfunc(X: ArrayLike): + """The function (x1 - 1)**2 + (x2 - 2)**2 + ...""" + x = np.array(X) + xbar = np.ones_like(x) + xbar *= (np.arange(x.shape[1]) + 1)[None, :] + y = np.sum((x - xbar)**2, axis=1) + return y.reshape(-1, 1) \ No newline at end of file diff --git a/entmoot/constraints.py b/entmoot/constraints.py index 28b6712..9bbc15b 100644 --- a/entmoot/constraints.py +++ b/entmoot/constraints.py @@ -9,8 +9,8 @@ ConstraintFunctionType = Callable[[pyo.ConcreteModel, int], pyo.Expression] class Constraint(ABC): - def __init__(self, features_keys: list[str]): - self.feature_keys = features_keys + def __init__(self, feature_keys: list[str]): + self.feature_keys = feature_keys def _get_feature_vars( self, model: pyo.ConcreteModel, feat_list: list["FeatureType"] From c40de4f8346401df701fec0e076b42f7a8201397 Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Mon, 18 Sep 2023 11:46:03 +0100 Subject: [PATCH 05/17] Add tests and docstrings --- entmoot/constraints.py | 22 ++++++++- tests/test_constraints_pyomo.py | 86 +++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 2 deletions(-) create mode 100644 tests/test_constraints_pyomo.py diff --git a/entmoot/constraints.py b/entmoot/constraints.py index 9bbc15b..a50ae70 100644 --- a/entmoot/constraints.py +++ b/entmoot/constraints.py @@ -9,6 +9,12 @@ ConstraintFunctionType = Callable[[pyo.ConcreteModel, int], pyo.Expression] class Constraint(ABC): + """A constraint to be applied to a model. + + Implements a user-friendly way to construct constraints to an optimisation problem. + + Attributes: + feature_keys: A list of the string names of the features to be constrained""" def __init__(self, feature_keys: list[str]): self.feature_keys = feature_keys @@ -20,12 +26,18 @@ def _get_feature_vars( feat_idxs = [all_keys.index(key) for key in self.feature_keys] features = [model._all_feat[i] for i in feat_idxs] return features - + + @abstractmethod + def as_pyomo_constraint(self, model: pyo.ConcreteModel, feat_list: list["FeatureType"]) -> pyo.Constraint: + """Convert to a pyomo.Constraint object. + + This requires the model (to access the variables), and the feat_list (to access the feature names)""" + pass class ExpressionConstraint(Constraint): """Constraints defined by pyomo.Expressions. - For constraints that can be simply defined by an expression of variables. + For constraints that can be simply defined by an expression of variables. """ def as_pyomo_constraint(self, model: pyo.ConcreteModel, feat_list: list["FeatureType"]) -> pyo.Constraint: features = self._get_feature_vars(model, feat_list) @@ -74,6 +86,7 @@ def _get_expr(self, features): class NChooseKConstraint(FunctionalConstraint): + """Constrain the number of active features to be bounded by min_count and max_count.""" tol: float = 1e-6 M: float = 1e6 def __init__(self, feature_keys: list[str], min_count: int, max_count: int, none_also_valid: bool = False): @@ -83,9 +96,14 @@ def __init__(self, feature_keys: list[str], min_count: int, max_count: int, none super().__init__(feature_keys) def _get_function(self, model, features): + # constrain the features using the binary variable y + # where y indicates whether the feature is selected + # y * tol <= x <= y * M + # tol is sufficiently small, M is sufficiently large model.feat_selected = pyo.Var(range(len(features)), domain=pyo.Binary, initialize=0) model.ub_selected = pyo.ConstraintList() model.lb_selected = pyo.ConstraintList() + for i in range(len(features)): model.ub_selected.add(expr=model.feat_selected[i]*self.M >= features[i]) model.lb_selected.add(expr=model.feat_selected[i]*self.tol <= features[i]) diff --git a/tests/test_constraints_pyomo.py b/tests/test_constraints_pyomo.py new file mode 100644 index 0000000..82cf953 --- /dev/null +++ b/tests/test_constraints_pyomo.py @@ -0,0 +1,86 @@ +from entmoot.problem_config import ProblemConfig +from entmoot.models.enting import Enting +from entmoot.optimizers.pyomo_opt import PyomoOptimizer + +from entmoot.benchmarks import build_reals_only_problem, eval_reals_only_testfunc, build_multi_obj_categorical_problem, eval_multi_obj_cat_testfunc +from entmoot.constraints import LinearEqualityConstraint, LinearInequalityConstraint, NChooseKConstraint +import pytest +import numpy as np + +def test_linear_equality_constraint(): + problem_config = ProblemConfig(rnd_seed=73) + # number of objectives + number_objectives = 2 + build_multi_obj_categorical_problem(problem_config, n_obj=number_objectives) + + # sample data + rnd_sample = problem_config.get_rnd_sample_list(num_samples=20) + testfunc_evals = eval_multi_obj_cat_testfunc(rnd_sample, n_obj=number_objectives) + + params = {"unc_params": {"dist_metric": "l1", "acq_sense": "exploration"}} + enting = Enting(problem_config, params=params) + # fit tree ensemble + enting.fit(rnd_sample, testfunc_evals) + + model_pyo = problem_config.get_pyomo_model_core() + # define the constraint + # then immediately apply it to the model + model_pyo.xy_equal = LinearEqualityConstraint( + feature_keys=["feat_3", "feat_4"], + coefficients=[1, -1], + rhs=0 + ).as_pyomo_constraint(model_pyo, problem_config.feat_list) + + model_pyo.yz_equal = LinearEqualityConstraint( + feature_keys=["feat_4", "feat_5"], + coefficients=[1, -1], + rhs=0 + ).as_pyomo_constraint(model_pyo, problem_config.feat_list) + + + # optimise the model + params_pyomo = {"solver_name": "gurobi"} + opt_pyo = PyomoOptimizer(problem_config, params=params_pyomo) + res_pyo = opt_pyo.solve(enting, model_core=model_pyo) + x_opt, y_opt, z_opt = res_pyo.opt_point[3:] + + assert round(x_opt, 5) == round(y_opt, 5) and round(y_opt, 5) == round(z_opt, 5) + + +@pytest.mark.parametrize("min_count,max_count", [ + (1, 3), + (0, 5), + (1, 1), + (5, 5), +]) +def test_nchoosek_constraint(min_count, max_count): + # standard setting up of problem + problem_config = ProblemConfig(rnd_seed=73) + build_reals_only_problem(problem_config) + rnd_sample = problem_config.get_rnd_sample_list(num_samples=50) + testfunc_evals = eval_reals_only_testfunc(rnd_sample) + + params = {"unc_params": {"dist_metric": "l1", "acq_sense": "penalty"}} + enting = Enting(problem_config, params=params) + # fit tree ensemble + enting.fit(rnd_sample, testfunc_evals) + + model_pyo = problem_config.get_pyomo_model_core() + + # define the constraint + # then immediately apply it to the model + model_pyo.nchoosek = NChooseKConstraint( + feature_keys=["x1", "x2", "x3", "x4", "x5"], + min_count=min_count, + max_count=max_count, + none_also_valid=False + ).as_pyomo_constraint(model_pyo, problem_config.feat_list) + + + # optimise the model + params_pyomo = {"solver_name": "gurobi"} + opt_pyo = PyomoOptimizer(problem_config, params=params_pyomo) + res_pyo = opt_pyo.solve(enting, model_core=model_pyo) + + assert min_count <= sum(x > 1e-6 for x in res_pyo.opt_point) <= max_count + From d197117c7d63569bc677bef7e376862a47f0fde4 Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Mon, 18 Sep 2023 12:11:24 +0100 Subject: [PATCH 06/17] Formatting (black+ruff) --- entmoot/constraints.py | 57 +++++++++++++++++++++++---------- tests/test_constraints_pyomo.py | 46 ++++++++++++++------------ 2 files changed, 65 insertions(+), 38 deletions(-) diff --git a/entmoot/constraints.py b/entmoot/constraints.py index a50ae70..8b8d807 100644 --- a/entmoot/constraints.py +++ b/entmoot/constraints.py @@ -8,13 +8,15 @@ ConstraintFunctionType = Callable[[pyo.ConcreteModel, int], pyo.Expression] + class Constraint(ABC): """A constraint to be applied to a model. - + Implements a user-friendly way to construct constraints to an optimisation problem. - + Attributes: feature_keys: A list of the string names of the features to be constrained""" + def __init__(self, feature_keys: list[str]): self.feature_keys = feature_keys @@ -26,20 +28,27 @@ def _get_feature_vars( feat_idxs = [all_keys.index(key) for key in self.feature_keys] features = [model._all_feat[i] for i in feat_idxs] return features - + @abstractmethod - def as_pyomo_constraint(self, model: pyo.ConcreteModel, feat_list: list["FeatureType"]) -> pyo.Constraint: + def as_pyomo_constraint( + self, model: pyo.ConcreteModel, feat_list: list["FeatureType"] + ) -> pyo.Constraint: """Convert to a pyomo.Constraint object. - - This requires the model (to access the variables), and the feat_list (to access the feature names)""" + + This requires the model (to access the variables), and the feat_list (to access the feature names) + """ pass + class ExpressionConstraint(Constraint): """Constraints defined by pyomo.Expressions. - For constraints that can be simply defined by an expression of variables. + For constraints that can be simply defined by an expression of variables. """ - def as_pyomo_constraint(self, model: pyo.ConcreteModel, feat_list: list["FeatureType"]) -> pyo.Constraint: + + def as_pyomo_constraint( + self, model: pyo.ConcreteModel, feat_list: list["FeatureType"] + ) -> pyo.Constraint: features = self._get_feature_vars(model, feat_list) return pyo.Constraint(expr=self._get_expr(features)) @@ -50,9 +59,13 @@ def _get_expr(self, features) -> pyo.Expression: class FunctionalConstraint(Constraint): """A constraint that uses a functional approach. - - For constraints that require creating intermediate variables and access to the model.""" - def as_pyomo_constraint(self, model: pyo.ConcreteModel, feat_list: list["FeatureType"]) -> pyo.Constraint: + + For constraints that require creating intermediate variables and access to the model. + """ + + def as_pyomo_constraint( + self, model: pyo.ConcreteModel, feat_list: list["FeatureType"] + ) -> pyo.Constraint: features = self._get_feature_vars(model, feat_list) return pyo.Constraint(rule=self._get_function(model, features)) @@ -87,9 +100,17 @@ def _get_expr(self, features): class NChooseKConstraint(FunctionalConstraint): """Constrain the number of active features to be bounded by min_count and max_count.""" + tol: float = 1e-6 M: float = 1e6 - def __init__(self, feature_keys: list[str], min_count: int, max_count: int, none_also_valid: bool = False): + + def __init__( + self, + feature_keys: list[str], + min_count: int, + max_count: int, + none_also_valid: bool = False, + ): self.min_count = min_count self.max_count = max_count self.none_also_valid = none_also_valid @@ -100,15 +121,17 @@ def _get_function(self, model, features): # where y indicates whether the feature is selected # y * tol <= x <= y * M # tol is sufficiently small, M is sufficiently large - model.feat_selected = pyo.Var(range(len(features)), domain=pyo.Binary, initialize=0) + model.feat_selected = pyo.Var( + range(len(features)), domain=pyo.Binary, initialize=0 + ) model.ub_selected = pyo.ConstraintList() model.lb_selected = pyo.ConstraintList() for i in range(len(features)): - model.ub_selected.add(expr=model.feat_selected[i]*self.M >= features[i]) - model.lb_selected.add(expr=model.feat_selected[i]*self.tol <= features[i]) - + model.ub_selected.add(expr=model.feat_selected[i] * self.M >= features[i]) + model.lb_selected.add(expr=model.feat_selected[i] * self.tol <= features[i]) + def inner(model, i): return sum(model.feat_selected.values()) <= self.max_count - return inner \ No newline at end of file + return inner diff --git a/tests/test_constraints_pyomo.py b/tests/test_constraints_pyomo.py index 82cf953..2eaff5c 100644 --- a/tests/test_constraints_pyomo.py +++ b/tests/test_constraints_pyomo.py @@ -2,10 +2,18 @@ from entmoot.models.enting import Enting from entmoot.optimizers.pyomo_opt import PyomoOptimizer -from entmoot.benchmarks import build_reals_only_problem, eval_reals_only_testfunc, build_multi_obj_categorical_problem, eval_multi_obj_cat_testfunc -from entmoot.constraints import LinearEqualityConstraint, LinearInequalityConstraint, NChooseKConstraint +from entmoot.benchmarks import ( + build_reals_only_problem, + eval_reals_only_testfunc, + build_multi_obj_categorical_problem, + eval_multi_obj_cat_testfunc, +) +from entmoot.constraints import ( + LinearEqualityConstraint, + NChooseKConstraint, +) import pytest -import numpy as np + def test_linear_equality_constraint(): problem_config = ProblemConfig(rnd_seed=73) @@ -26,18 +34,13 @@ def test_linear_equality_constraint(): # define the constraint # then immediately apply it to the model model_pyo.xy_equal = LinearEqualityConstraint( - feature_keys=["feat_3", "feat_4"], - coefficients=[1, -1], - rhs=0 + feature_keys=["feat_3", "feat_4"], coefficients=[1, -1], rhs=0 ).as_pyomo_constraint(model_pyo, problem_config.feat_list) model_pyo.yz_equal = LinearEqualityConstraint( - feature_keys=["feat_4", "feat_5"], - coefficients=[1, -1], - rhs=0 + feature_keys=["feat_4", "feat_5"], coefficients=[1, -1], rhs=0 ).as_pyomo_constraint(model_pyo, problem_config.feat_list) - # optimise the model params_pyomo = {"solver_name": "gurobi"} opt_pyo = PyomoOptimizer(problem_config, params=params_pyomo) @@ -46,13 +49,16 @@ def test_linear_equality_constraint(): assert round(x_opt, 5) == round(y_opt, 5) and round(y_opt, 5) == round(z_opt, 5) - -@pytest.mark.parametrize("min_count,max_count", [ - (1, 3), - (0, 5), - (1, 1), - (5, 5), -]) + +@pytest.mark.parametrize( + "min_count,max_count", + [ + (1, 3), + (0, 5), + (1, 1), + (5, 5), + ], +) def test_nchoosek_constraint(min_count, max_count): # standard setting up of problem problem_config = ProblemConfig(rnd_seed=73) @@ -70,17 +76,15 @@ def test_nchoosek_constraint(min_count, max_count): # define the constraint # then immediately apply it to the model model_pyo.nchoosek = NChooseKConstraint( - feature_keys=["x1", "x2", "x3", "x4", "x5"], + feature_keys=["x1", "x2", "x3", "x4", "x5"], min_count=min_count, max_count=max_count, - none_also_valid=False + none_also_valid=False, ).as_pyomo_constraint(model_pyo, problem_config.feat_list) - # optimise the model params_pyomo = {"solver_name": "gurobi"} opt_pyo = PyomoOptimizer(problem_config, params=params_pyomo) res_pyo = opt_pyo.solve(enting, model_core=model_pyo) assert min_count <= sum(x > 1e-6 for x in res_pyo.opt_point) <= max_count - From bca0d5ab00cc9ff7670c02dabc5b14323d5c9172 Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Mon, 18 Sep 2023 14:53:10 +0100 Subject: [PATCH 07/17] Add MaxObjective class --- entmoot/models/enting.py | 7 +++++-- entmoot/problem_config.py | 23 +++++++++++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/entmoot/models/enting.py b/entmoot/models/enting.py index b448904..ba49d89 100644 --- a/entmoot/models/enting.py +++ b/entmoot/models/enting.py @@ -106,7 +106,7 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> None: "Argument 'y' has wrong dimensions. " f"Expected '(num_samples, {len(self._problem_config.obj_list)})', got '{y.shape}'." ) - + y = self._problem_config.transform_objective(y) self.mean_model.fit(X, y) self.unc_model.fit(X, y) @@ -131,8 +131,11 @@ def predict(self, X: np.ndarray, is_enc=False) -> list: f"Expected '(num_samples, {len(self._problem_config.feat_list)})', got '{X.shape}'." ) - mean_pred = self.mean_model.predict(X).tolist() + mean_pred = self.mean_model.predict(X) #.tolist() unc_pred = self.unc_model.predict(X) + + mean_pred = self._problem_config.transform_objective(mean_pred) + mean_pred = mean_pred.tolist() comb_pred = [(mean, unc) for mean, unc in zip(mean_pred, unc_pred)] return comb_pred diff --git a/entmoot/problem_config.py b/entmoot/problem_config.py index 2a34b29..d240def 100644 --- a/entmoot/problem_config.py +++ b/entmoot/problem_config.py @@ -167,6 +167,20 @@ def add_min_objective(self, name: str = None): self._obj_list.append(MinObjective(name=name)) + def add_max_objective(self, name: str = None): + if name is None: + name = f"obj_{len(self.obj_list)}" + + self._obj_list.append(MaxObjective(name=name)) + + def transform_objective(self, y: np.ndarray) -> np.ndarray: + """Transform data for minimisation/maximisation""" + # y.shape = (num_samples, num_obj) + signs = np.array([obj.sign for obj in self.obj_list]).reshape(1, -1) + return y * signs + + + def get_rnd_sample_numpy(self, num_samples): # returns np.array for faster processing array_list = [] @@ -455,7 +469,12 @@ def decode(self, xi): def is_bin(self): return True - -class MinObjective: +class Objective: def __init__(self, name): self.name = name + +class MinObjective(Objective): + sign = 1 + +class MaxObjective(Objective): + sign = -1 \ No newline at end of file From fce409860cf5ba8b62893f13bbf78cde4e39b2dd Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Mon, 18 Sep 2023 15:44:04 +0100 Subject: [PATCH 08/17] Add tests and example of maximisation --- docs/notebooks/single_obj_maximisation.ipynb | 171 +++++++++++++++++++ tests/test_objectives_pyomo.py | 65 +++++++ 2 files changed, 236 insertions(+) create mode 100644 docs/notebooks/single_obj_maximisation.ipynb create mode 100644 tests/test_objectives_pyomo.py diff --git a/docs/notebooks/single_obj_maximisation.ipynb b/docs/notebooks/single_obj_maximisation.ipynb new file mode 100644 index 0000000..779e30a --- /dev/null +++ b/docs/notebooks/single_obj_maximisation.ipynb @@ -0,0 +1,171 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Objective Maximisation\n", + "\n", + "ENTMOOT supports both minimisation and maximisation of objective functions. This notebook defines a concave function, that has a maximum at (1, 1)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from entmoot import Enting, ProblemConfig, PyomoOptimizer\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# define a maximisation problem\n", + "def eval_simple_max_testfunc(X):\n", + " x = np.array(X)\n", + " y = - np.sum((x - np.ones_like(x)) ** 2, axis=1)\n", + " return y.reshape(-1, 1)\n", + "\n", + "def build_simple_max_problem(problem_config: ProblemConfig):\n", + " problem_config.add_feature(\"real\", (0.0, 2.0), name=\"x1\")\n", + " problem_config.add_feature(\"real\", (0.0, 2.0), name=\"x2\")\n", + " problem_config.add_max_objective()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\users\\tobyb\\phd\\entmoot\\entmoot\\models\\mean_models\\tree_ensemble.py:23: UserWarning: No 'train_params' for tree ensemble training specified. Switch training to default params!\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "# define problem\n", + "problem_config = ProblemConfig(rnd_seed=73)\n", + "# number of objectives\n", + "build_simple_max_problem(problem_config)\n", + "# sample data\n", + "rnd_sample = problem_config.get_rnd_sample_list(num_samples=200)\n", + "testfunc_evals = eval_simple_max_testfunc(rnd_sample)\n", + "\n", + "params = {\"unc_params\": {\"dist_metric\": \"l1\", \"acq_sense\": \"penalty\"}}\n", + "enting = Enting(problem_config, params=params)\n", + "enting.fit(rnd_sample, testfunc_evals)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Set parameter Username\n", + "Academic license - for non-commercial use only - expires 2024-09-06\n", + "Read LP format model from file C:\\Users\\tobyb\\AppData\\Local\\Temp\\tmp89rvgogt.pyomo.lp\n", + "Reading time = 0.02 seconds\n", + "x1: 3828 rows, 2714 columns, 12045 nonzeros\n", + "Gurobi Optimizer version 10.0.2 build v10.0.2rc0 (win64)\n", + "\n", + "CPU model: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz, instruction set [SSE2|AVX|AVX2|AVX512]\n", + "Thread count: 4 physical cores, 8 logical processors, using up to 8 threads\n", + "\n", + "Optimize a model with 3828 rows, 2714 columns and 12045 nonzeros\n", + "Model fingerprint: 0xb8904322\n", + "Variable types: 1604 continuous, 1110 integer (1110 binary)\n", + "Coefficient statistics:\n", + " Matrix range [2e-06, 2e+00]\n", + " Objective range [1e+00, 2e+00]\n", + " Bounds range [1e+00, 2e+00]\n", + " RHS range [1e-04, 2e+00]\n", + "Presolve removed 435 rows and 419 columns\n", + "Presolve time: 0.08s\n", + "Presolved: 3393 rows, 2295 columns, 10942 nonzeros\n", + "Variable types: 1585 continuous, 710 integer (710 binary)\n", + "Found heuristic solution: objective 1.4333792\n", + "Found heuristic solution: objective 1.3753556\n", + "\n", + "Root relaxation: objective 3.952765e-02, 617 iterations, 0.00 seconds (0.01 work units)\n", + "\n", + " Nodes | Current Node | Objective Bounds | Work\n", + " Expl Unexpl | Obj Depth IntInf | Incumbent BestBd Gap | It/Node Time\n", + "\n", + " 0 0 0.03953 0 4 1.37536 0.03953 97.1% - 0s\n", + "H 0 0 0.0886752 0.03953 55.4% - 0s\n", + " 0 0 0.03953 0 2 0.08868 0.03953 55.4% - 0s\n", + "H 0 0 0.0395276 0.03953 0.00% - 0s\n", + " 0 0 0.03953 0 2 0.03953 0.03953 0.00% - 0s\n", + "\n", + "Cutting planes:\n", + " Cover: 43\n", + " Implied bound: 3\n", + " Clique: 5\n", + " Flow cover: 7\n", + " Relax-and-lift: 3\n", + "\n", + "Explored 1 nodes (633 simplex iterations) in 0.21 seconds (0.19 work units)\n", + "Thread count was 8 (of 8 available processors)\n", + "\n", + "Solution count 4: 0.0395276 0.0886752 1.37536 1.43338 \n", + "\n", + "Optimal solution found (tolerance 1.00e-04)\n", + "Best objective 3.952764801939e-02, best bound 3.952764801939e-02, gap 0.0000%\n" + ] + }, + { + "data": { + "text/plain": [ + "OptResult(opt_point=[1.1282956329455374, 0.9219237163314549], opt_val=0.039527648019385436, mu_unscaled=[0.039527648019385436], unc_unscaled=0.0, active_leaf_enc=[[(0, '010'), (1, '010'), (2, '011'), (3, '101'), (4, '010'), (5, '101'), (6, '011'), (7, '001'), (8, '011'), (9, '011'), (10, '011'), (11, '011'), (12, '011'), (13, '010'), (14, '010'), (15, '010'), (16, '011'), (17, '011'), (18, '010'), (19, '011'), (20, '011'), (21, '010'), (22, '010'), (23, '010'), (24, '100'), (25, '100'), (26, '101'), (27, '100'), (28, '101'), (29, '101'), (30, '010'), (31, '101'), (32, '010'), (33, '011'), (34, '010'), (35, '010'), (36, '011'), (37, '011'), (38, '010'), (39, '010'), (40, '011'), (41, '001'), (42, '010'), (43, '011'), (44, '010'), (45, '100'), (46, '100'), (47, '101'), (48, '010'), (49, '011'), (50, '010'), (51, '010'), (52, '101'), (53, '010'), (54, '101'), (55, '101'), (56, '101'), (57, '100'), (58, '101'), (59, '010'), (60, '101'), (61, '010'), (62, '101'), (63, '010'), (64, '011'), (65, '011'), (66, '010'), (67, '100'), (68, '010'), (69, '101'), (70, '010'), (71, '100'), (72, '100'), (73, '101'), (74, '011'), (75, '010'), (76, '010'), (77, '010'), (78, '100'), (79, '101'), (80, '110'), (81, '101'), (82, '101'), (83, '010'), (84, '101'), (85, '101'), (86, '100'), (87, '110'), (88, '100'), (89, '101'), (90, '100'), (91, '100'), (92, '100'), (93, '101'), (94, '101'), (95, '010'), (96, '101'), (97, '100'), (98, '100'), (99, '101')]])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "params_pyomo = {\"solver_name\": \"gurobi\"}\n", + "opt_pyo = PyomoOptimizer(problem_config, params=params_pyomo)\n", + "\n", + "res_pyo = opt_pyo.solve(enting)\n", + "res_pyo" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "enttest", + "language": "python", + "name": "enttest" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/test_objectives_pyomo.py b/tests/test_objectives_pyomo.py new file mode 100644 index 0000000..70767e1 --- /dev/null +++ b/tests/test_objectives_pyomo.py @@ -0,0 +1,65 @@ +from entmoot import Enting, ProblemConfig, PyomoOptimizer +from entmoot.benchmarks import ( + build_multi_obj_categorical_problem, + eval_multi_obj_cat_testfunc, +) +from pytest import approx + +def test_max_predictions_equal_min_predictions(): + """The sign of the predicted objective is independent of max/min.""" + problem_config = ProblemConfig(rnd_seed=73) + build_multi_obj_categorical_problem(problem_config, n_obj=1) + problem_config.add_min_objective() + + problem_config_max = ProblemConfig(rnd_seed=73) + build_multi_obj_categorical_problem(problem_config_max, n_obj=1) + problem_config_max.add_max_objective() + + rnd_sample = problem_config.get_rnd_sample_list(num_samples=20) + testfunc_evals = eval_multi_obj_cat_testfunc(rnd_sample, n_obj=2) + + params = {"unc_params": {"dist_metric": "l1", "acq_sense": "exploration"}} + enting = Enting(problem_config, params=params) + enting.fit(rnd_sample, testfunc_evals) + + enting_max = Enting(problem_config_max, params=params) + enting_max.fit(rnd_sample, testfunc_evals) + + sample = problem_config.get_rnd_sample_list(num_samples=3) + pred = enting.predict(sample) + pred_max = enting_max.predict(sample) + + for ((m1, u1), (m2, u2)) in zip(pred, pred_max): + print(">", m1, m2) + assert m1 == approx(m2, rel=1e-5) + assert u1 == approx(u2, rel=1e-5) + +def test_max_objective_equals_minus_min_objective(): + """Assert that the solution found by the minimiser is the same as that of the maximiser for the negative objective function""" + problem_config = ProblemConfig(rnd_seed=73) + build_multi_obj_categorical_problem(problem_config, n_obj=1) + problem_config.add_min_objective() + + problem_config_max = ProblemConfig(rnd_seed=73) + build_multi_obj_categorical_problem(problem_config_max, n_obj=0) + problem_config_max.add_max_objective() + problem_config_max.add_max_objective() + + rnd_sample = problem_config.get_rnd_sample_list(num_samples=20) + testfunc_evals = eval_multi_obj_cat_testfunc(rnd_sample, n_obj=2) + + params = {"unc_params": {"dist_metric": "l1", "acq_sense": "penalty"}} + enting = Enting(problem_config, params=params) + enting.fit(rnd_sample, testfunc_evals) + # pass negative test evaluations to the maximiser + enting_max = Enting(problem_config, params=params) + enting_max.fit(rnd_sample, -testfunc_evals) + + params_pyomo = {"solver_name": "gurobi"} + res = PyomoOptimizer(problem_config, params=params_pyomo).solve(enting) + res_max = PyomoOptimizer(problem_config_max, params=params_pyomo).solve(enting) + + assert res.opt_point == approx(res_max.opt_point, rel=1e-5) + + + From b258ef8eb22801a446794cffb5902a7a9efb9271 Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Mon, 18 Sep 2023 16:32:17 +0100 Subject: [PATCH 09/17] Create param dataclasses --- entmoot/utils.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/entmoot/utils.py b/entmoot/utils.py index 4039158..b06b66c 100644 --- a/entmoot/utils.py +++ b/entmoot/utils.py @@ -1,4 +1,6 @@ from collections import namedtuple +from dataclasses import dataclass +from typing import Literal import numpy as np from scipy.special import comb @@ -9,6 +11,37 @@ ) +@dataclass +class EntingParams: + unc_params: "UncParams" = None + tree_train_params: "TreeTrainParams" = None + +@dataclass +class UncParams: + beta: float = 1.96 # >0 + acq_sense: Literal["exploration", "penalty"] = "exploration" + dist_trafo: Literal["normal", "standard"] = "normal" + dist_metric: Literal["euclidean_squared", "l1", "l2"] = "euclidean_squared" + cat_metric: Literal["overlap", "of", "goodall4"] = "overlap" + +@dataclass +class TreeTrainParams: + train_params: "TrainParams" + train_lib: Literal["lgbm"] = "lgbm" + +@dataclass +class TrainParams: + # lightgbm training hyperparameters + objective: str = "regression" + metric: str = "rmse" + boosting: str = "gbdt" + num_boost_round: int = 100 + max_depth: int = 3 + min_data_in_leaf: int = 1 + min_data_per_group: int = 1 + verbose: int = -1 + + def grid(dimension: int, levels: int) -> np.ndarray: """Construct a regular grid on the unit simplex. From 1a14dd49afe2873fb94b6db3648aba3a3c1b7daa Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Mon, 18 Sep 2023 16:56:16 +0100 Subject: [PATCH 10/17] EntingParam to/from dictionary --- entmoot/models/enting.py | 10 +++++++++- entmoot/utils.py | 39 +++++++++++++++++++++++++++++---------- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/entmoot/models/enting.py b/entmoot/models/enting.py index b448904..55bdda8 100644 --- a/entmoot/models/enting.py +++ b/entmoot/models/enting.py @@ -4,7 +4,10 @@ from entmoot.models.uncertainty_models.distance_based_uncertainty import ( DistanceBasedUncertainty, ) +from entmoot.utils import EntingParams +from dataclasses import asdict import numpy as np +from typing import Union class Enting(BaseModel): @@ -47,9 +50,14 @@ class Enting(BaseModel): X_opt_pyo, _, _ = opt_pyo.solve(enting) """ - def __init__(self, problem_config: ProblemConfig, params: dict = None): + def __init__(self, problem_config: ProblemConfig, params: Union[EntingParams, dict]): if params is None: params = {} + if isinstance(params, dict): + params = EntingParams.from_dict(params) + + # this is temporary - just to avoid breaking the code! + params = asdict(params) self._problem_config = problem_config diff --git a/entmoot/utils.py b/entmoot/utils.py index b06b66c..66bb05d 100644 --- a/entmoot/utils.py +++ b/entmoot/utils.py @@ -1,5 +1,5 @@ from collections import namedtuple -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Literal import numpy as np from scipy.special import comb @@ -11,10 +11,6 @@ ) -@dataclass -class EntingParams: - unc_params: "UncParams" = None - tree_train_params: "TreeTrainParams" = None @dataclass class UncParams: @@ -24,11 +20,6 @@ class UncParams: dist_metric: Literal["euclidean_squared", "l1", "l2"] = "euclidean_squared" cat_metric: Literal["overlap", "of", "goodall4"] = "overlap" -@dataclass -class TreeTrainParams: - train_params: "TrainParams" - train_lib: Literal["lgbm"] = "lgbm" - @dataclass class TrainParams: # lightgbm training hyperparameters @@ -42,6 +33,34 @@ class TrainParams: verbose: int = -1 +@dataclass +class TreeTrainParams: + train_params: "TrainParams" = field(default_factory=TrainParams) + train_lib: Literal["lgbm"] = "lgbm" + + +@dataclass +class EntingParams: + unc_params: "UncParams" = field(default_factory=UncParams) + tree_train_params: "TreeTrainParams" = field(default_factory=TreeTrainParams) + + @staticmethod + def from_dict(d: dict): + d_unc_params = d.get("unc_params", {}) + d_tree_train_params = d.get("tree_train_params", {}) + d_train_params = d_tree_train_params.get("train_params", {}) + d_tree_train_params = {k: v for k, v in d_tree_train_params.items() if k!="train_params"} + + return EntingParams( + unc_params=UncParams(**d_unc_params), + tree_train_params=TreeTrainParams( + train_params=TrainParams(**d_train_params), + **d_tree_train_params + ) + ) + + + def grid(dimension: int, levels: int) -> np.ndarray: """Construct a regular grid on the unit simplex. From 60aa753981946d8bcf5da6292bca36980c386b15 Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Wed, 4 Oct 2023 15:23:06 +0100 Subject: [PATCH 11/17] Convert models to use dataclasses --- entmoot/models/enting.py | 18 +++-- entmoot/models/mean_models/tree_ensemble.py | 34 +++------ entmoot/models/model_params.py | 60 +++++++++++++++ .../distance_based_uncertainty.py | 74 +++++++++++-------- entmoot/utils.py | 50 ------------- 5 files changed, 125 insertions(+), 111 deletions(-) create mode 100644 entmoot/models/model_params.py diff --git a/entmoot/models/enting.py b/entmoot/models/enting.py index 55bdda8..c2ce6f2 100644 --- a/entmoot/models/enting.py +++ b/entmoot/models/enting.py @@ -4,7 +4,7 @@ from entmoot.models.uncertainty_models.distance_based_uncertainty import ( DistanceBasedUncertainty, ) -from entmoot.utils import EntingParams +from entmoot.models.model_params import EntingParams from dataclasses import asdict import numpy as np from typing import Union @@ -50,19 +50,20 @@ class Enting(BaseModel): X_opt_pyo, _, _ = opt_pyo.solve(enting) """ - def __init__(self, problem_config: ProblemConfig, params: Union[EntingParams, dict]): + def __init__(self, problem_config: ProblemConfig, params: Union[EntingParams, dict, None]): if params is None: params = {} if isinstance(params, dict): - params = EntingParams.from_dict(params) + params = EntingParams.fromdict(params) # this is temporary - just to avoid breaking the code! - params = asdict(params) + # params = asdict(params) self._problem_config = problem_config # check params values - tree_training_params = params.get("tree_train_params", {}) + # tree_training_params = params.get("tree_train_params", {}) + tree_training_params = params.tree_train_params # initialize mean model self.mean_model = TreeEnsemble( @@ -70,14 +71,15 @@ def __init__(self, problem_config: ProblemConfig, params: Union[EntingParams, di ) # initialize unc model - unc_params = params.get("unc_params", {}) - self._acq_sense = unc_params.get("acq_sense", "exploration") + # unc_params = params.get("unc_params", {}) + unc_params = params.unc_params + self._acq_sense = unc_params.acq_sense assert self._acq_sense in ( "exploration", "penalty", ), f"Pick 'acq_sense' '{self._acq_sense}' in '('exploration', 'penalty')'." - self._beta = unc_params.get("beta", 1.96) + self._beta = unc_params.beta assert ( self._beta >= 0.0 ), f"Value for 'beta' is {self._beta} but must be '>= 0.0'." diff --git a/entmoot/models/mean_models/tree_ensemble.py b/entmoot/models/mean_models/tree_ensemble.py index 7eb783c..84ee194 100644 --- a/entmoot/models/mean_models/tree_ensemble.py +++ b/entmoot/models/mean_models/tree_ensemble.py @@ -1,45 +1,31 @@ from entmoot.models.base_model import BaseModel from entmoot.models.mean_models.lgbm_utils import read_lgbm_tree_model_dict from entmoot.models.mean_models.meta_tree_ensemble import MetaTreeModel +from entmoot.models.model_params import TreeTrainParams import warnings +from typing import Union +from dataclasses import asdict import numpy as np class TreeEnsemble(BaseModel): - def __init__(self, problem_config, params=None): + def __init__(self, problem_config, params:Union[TreeTrainParams, dict, None]=None): if params is None: params = {} + if isinstance(params, dict): + params = TreeTrainParams.fromdict(params) self._problem_config = problem_config - self._train_lib = params.get("train_lib", "lgbm") + self._train_lib = params.train_lib self._rnd_seed = problem_config.rnd_seed assert self._train_lib in ("lgbm"), ( "Parameter 'train_lib' for tree ensembles needs to be " "in '('lgbm')'." ) - if "train_params" not in params: - # default training params - warnings.warn( - "No 'train_params' for tree ensemble training specified. " - "Switch training to default params!" - ) - - self._train_params = { - "objective": "regression", - "metric": "rmse", - "boosting": "gbdt", - "num_boost_round": 100, - "max_depth": 3, - "min_data_in_leaf": 1, - "min_data_per_group": 1, - "verbose": -1, - } - - if self._rnd_seed is not None: - self._train_params["random_state"] = self._rnd_seed - else: - self._train_params = params["train_params"] + self._train_params = asdict(params.train_params) + if self._rnd_seed is not None: + self._train_params["random_state"] = self._rnd_seed self._tree_dict = None self._meta_tree_dict = {} diff --git a/entmoot/models/model_params.py b/entmoot/models/model_params.py new file mode 100644 index 0000000..5e71782 --- /dev/null +++ b/entmoot/models/model_params.py @@ -0,0 +1,60 @@ +"""Dataclasses containing the parameters for Enting models""" + +from typing import Literal +from dataclasses import dataclass, field + +@dataclass +class UncParams: + beta: float = 1.96 + bound_coeff: float = 0.5 + acq_sense: Literal["exploration", "penalty"] = "exploration" + dist_trafo: Literal["normal", "standard"] = "normal" + dist_metric: Literal["euclidean_squared", "l1", "l2"] = "euclidean_squared" + cat_metric: Literal["overlap", "of", "goodall4"] = "overlap" + + def __post_init__(self): + if self.beta < 0.0: + raise ValueError(f"Value for 'beta' is {self.beta} but must be '>= 0.0'.") + +@dataclass +class TrainParams: + # lightgbm training hyperparameters + objective: str = "regression" + metric: str = "rmse" + boosting: str = "gbdt" + num_boost_round: int = 100 + max_depth: int = 3 + min_data_in_leaf: int = 1 + min_data_per_group: int = 1 + verbose: int = -1 + + +@dataclass +class TreeTrainParams: + train_params: "TrainParams" = field(default_factory=TrainParams) + train_lib: Literal["lgbm"] = "lgbm" + + @staticmethod + def fromdict(d: dict): + d_train_params = d.get("train_params", {}) + d_tree_train_params = {k: v for k, v in d.items() if k!="train_params"} + return TreeTrainParams( + train_params=TrainParams(**d_train_params), + **d_tree_train_params + ) + + +@dataclass +class EntingParams: + unc_params: "UncParams" = field(default_factory=UncParams) + tree_train_params: "TreeTrainParams" = field(default_factory=TreeTrainParams) + + @staticmethod + def fromdict(d: dict): + d_unc_params = d.get("unc_params", {}) + d_tree_train_params = d.get("tree_train_params", {}) + + return EntingParams( + unc_params=UncParams(**d_unc_params), + tree_train_params=TreeTrainParams.fromdict(d_tree_train_params) + ) \ No newline at end of file diff --git a/entmoot/models/uncertainty_models/distance_based_uncertainty.py b/entmoot/models/uncertainty_models/distance_based_uncertainty.py index 64c9c0a..db49237 100644 --- a/entmoot/models/uncertainty_models/distance_based_uncertainty.py +++ b/entmoot/models/uncertainty_models/distance_based_uncertainty.py @@ -1,4 +1,5 @@ from entmoot.models.base_model import BaseModel +from entmoot.models.uncertainty_models.base_distance import CatDistance, NonCatDistance from entmoot.models.uncertainty_models.euclidean_squared_distance import ( EuclideanSquaredDistance, ) @@ -9,25 +10,39 @@ from entmoot.models.uncertainty_models.goodall4_distance import Goodall4Distance from entmoot.models.uncertainty_models.of_distance import OfDistance +from entmoot.models.model_params import UncParams +from typing import Union import numpy as np +def distance_func_mapper(dist_name: str, cat: bool) -> Union[CatDistance, NonCatDistance]: + """Given a string, return the distance function""" + non_cat_dists = { + "euclidean_squared": EuclideanSquaredDistance, + "l1": L1Distance, + "l2": L2Distance, + } + cat_dists = { + "overlap": OverlapDistance, + "of": OfDistance, + "goodall4": Goodall4Distance, + } + if cat: + return cat_dists.get(dist_name) + else: + return non_cat_dists.get(dist_name) + class DistanceBasedUncertainty(BaseModel): - def __init__(self, problem_config, params): + def __init__(self, problem_config, params: UncParams): self._problem_config = problem_config - dist_metric = params.get("dist_metric", "euclidean_squared") - dist_trafo = params.get("dist_trafo", "normal") - cat_metric = params.get("cat_metric", "overlap") - acq_sense = params.get("acq_sense", "exploration") - self._non_cat_x, self._cat_x = None, None self._dist_bound = None - self._dist_metric = dist_metric + self._dist_metric = params.dist_metric self._num_cache_x = None - self._acq_sense = acq_sense + self._acq_sense = params.acq_sense - if dist_trafo == "standard": + if params.dist_trafo == "standard": assert ( len(self._problem_config.obj_list) == 1 ), "Distance transformation 'standard' can only be used for single objective problems." @@ -37,59 +52,60 @@ def __init__(self, problem_config, params): ), "Distance transformation 'standard' can only be used for non-categorical problems." self._dist_has_var_bound = False if self._acq_sense == "penalty" else True - self._bound_coeff = params.get("bound_coeff", 0.5) + self._bound_coeff = params.bound_coeff self._dist_coeff = 1.0 - elif dist_trafo == "normal": + elif params.dist_trafo == "normal": self._dist_has_var_bound = False self._bound_coeff = None self._dist_coeff = 1 / len(self._problem_config.feat_list) else: raise IOError( - f"Pick 'dist_trafo' '{dist_trafo}' in '('normal', 'standard')'." + f"Pick 'dist_trafo' '{params.dist_trafo}' in '('normal', 'standard')'." ) # pick distance metric for non-cat features - if dist_metric == "euclidean_squared": + # non_cat_distance = distance_func_mapper(params.dist_metric, cat=False) + if params.dist_metric == "euclidean_squared": self.non_cat_unc_model = EuclideanSquaredDistance( problem_config=self._problem_config, - acq_sense=acq_sense, - dist_trafo=dist_trafo, + acq_sense=params.acq_sense, + dist_trafo=params.dist_trafo, ) - elif dist_metric == "l1": + elif params.dist_metric == "l1": self.non_cat_unc_model = L1Distance( problem_config=self._problem_config, - acq_sense=acq_sense, - dist_trafo=dist_trafo, + acq_sense=params.acq_sense, + dist_trafo=params.dist_trafo, ) - elif dist_metric == "l2": + elif params.dist_metric == "l2": self.non_cat_unc_model = L2Distance( problem_config=self._problem_config, - acq_sense=acq_sense, - dist_trafo=dist_trafo, + acq_sense=params.acq_sense, + dist_trafo=params.dist_trafo, ) else: raise IOError( - f"Non-categorical uncertainty metric '{dist_metric}' for " + f"Non-categorical uncertainty metric '{params.dist_metric}' for " f"{self.__class__.__name__} model is not supported. " f"Check 'params['uncertainty_type']'." ) # pick distance metric for cat features - if cat_metric == "overlap": + if params.cat_metric == "overlap": self.cat_unc_model = OverlapDistance( - problem_config=self._problem_config, acq_sense=acq_sense + problem_config=self._problem_config, acq_sense=params.acq_sense ) - elif cat_metric == "of": + elif params.cat_metric == "of": self.cat_unc_model = OfDistance( - problem_config=self._problem_config, acq_sense=acq_sense + problem_config=self._problem_config, acq_sense=params.acq_sense ) - elif cat_metric == "goodall4": + elif params.cat_metric == "goodall4": self.cat_unc_model = Goodall4Distance( - problem_config=self._problem_config, acq_sense=acq_sense + problem_config=self._problem_config, acq_sense=params.acq_sense ) else: raise IOError( - f"Categorical uncertainty metric '{cat_metric}' for {self.__class__.__name__} " + f"Categorical uncertainty metric '{params.cat_metric}' for {self.__class__.__name__} " f"model is not supported. Check 'params['uncertainty_type']'." ) diff --git a/entmoot/utils.py b/entmoot/utils.py index 66bb05d..9c360a9 100644 --- a/entmoot/utils.py +++ b/entmoot/utils.py @@ -11,56 +11,6 @@ ) - -@dataclass -class UncParams: - beta: float = 1.96 # >0 - acq_sense: Literal["exploration", "penalty"] = "exploration" - dist_trafo: Literal["normal", "standard"] = "normal" - dist_metric: Literal["euclidean_squared", "l1", "l2"] = "euclidean_squared" - cat_metric: Literal["overlap", "of", "goodall4"] = "overlap" - -@dataclass -class TrainParams: - # lightgbm training hyperparameters - objective: str = "regression" - metric: str = "rmse" - boosting: str = "gbdt" - num_boost_round: int = 100 - max_depth: int = 3 - min_data_in_leaf: int = 1 - min_data_per_group: int = 1 - verbose: int = -1 - - -@dataclass -class TreeTrainParams: - train_params: "TrainParams" = field(default_factory=TrainParams) - train_lib: Literal["lgbm"] = "lgbm" - - -@dataclass -class EntingParams: - unc_params: "UncParams" = field(default_factory=UncParams) - tree_train_params: "TreeTrainParams" = field(default_factory=TreeTrainParams) - - @staticmethod - def from_dict(d: dict): - d_unc_params = d.get("unc_params", {}) - d_tree_train_params = d.get("tree_train_params", {}) - d_train_params = d_tree_train_params.get("train_params", {}) - d_tree_train_params = {k: v for k, v in d_tree_train_params.items() if k!="train_params"} - - return EntingParams( - unc_params=UncParams(**d_unc_params), - tree_train_params=TreeTrainParams( - train_params=TrainParams(**d_train_params), - **d_tree_train_params - ) - ) - - - def grid(dimension: int, levels: int) -> np.ndarray: """Construct a regular grid on the unit simplex. From 1d108cd1d9df4f632725cb5835922a0754f73a18 Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Wed, 4 Oct 2023 16:15:25 +0100 Subject: [PATCH 12/17] Move data validation to Params, add testing --- entmoot/models/enting.py | 19 +----- entmoot/models/mean_models/tree_ensemble.py | 8 +-- entmoot/models/model_params.py | 53 +++++++++------- .../distance_based_uncertainty.py | 61 ++++++++----------- tests/test_model_params.py | 30 +++++++++ 5 files changed, 89 insertions(+), 82 deletions(-) create mode 100644 tests/test_model_params.py diff --git a/entmoot/models/enting.py b/entmoot/models/enting.py index c2ce6f2..8412a85 100644 --- a/entmoot/models/enting.py +++ b/entmoot/models/enting.py @@ -54,35 +54,20 @@ def __init__(self, problem_config: ProblemConfig, params: Union[EntingParams, di if params is None: params = {} if isinstance(params, dict): - params = EntingParams.fromdict(params) - - # this is temporary - just to avoid breaking the code! - # params = asdict(params) + params = EntingParams(**params) self._problem_config = problem_config - # check params values - # tree_training_params = params.get("tree_train_params", {}) - tree_training_params = params.tree_train_params - # initialize mean model self.mean_model = TreeEnsemble( - problem_config=problem_config, params=tree_training_params + problem_config=problem_config, params=params.tree_train_params ) # initialize unc model - # unc_params = params.get("unc_params", {}) unc_params = params.unc_params self._acq_sense = unc_params.acq_sense - assert self._acq_sense in ( - "exploration", - "penalty", - ), f"Pick 'acq_sense' '{self._acq_sense}' in '('exploration', 'penalty')'." self._beta = unc_params.beta - assert ( - self._beta >= 0.0 - ), f"Value for 'beta' is {self._beta} but must be '>= 0.0'." if self._acq_sense == "exploration": self._beta = -self._beta diff --git a/entmoot/models/mean_models/tree_ensemble.py b/entmoot/models/mean_models/tree_ensemble.py index 84ee194..741bc29 100644 --- a/entmoot/models/mean_models/tree_ensemble.py +++ b/entmoot/models/mean_models/tree_ensemble.py @@ -9,20 +9,16 @@ class TreeEnsemble(BaseModel): - def __init__(self, problem_config, params:Union[TreeTrainParams, dict, None]=None): + def __init__(self, problem_config, params: Union[TreeTrainParams, dict, None] = None): if params is None: params = {} if isinstance(params, dict): - params = TreeTrainParams.fromdict(params) + params = TreeTrainParams(**params) self._problem_config = problem_config self._train_lib = params.train_lib self._rnd_seed = problem_config.rnd_seed - assert self._train_lib in ("lgbm"), ( - "Parameter 'train_lib' for tree ensembles needs to be " "in '('lgbm')'." - ) - self._train_params = asdict(params.train_params) if self._rnd_seed is not None: self._train_params["random_state"] = self._rnd_seed diff --git a/entmoot/models/model_params.py b/entmoot/models/model_params.py index 5e71782..410c9db 100644 --- a/entmoot/models/model_params.py +++ b/entmoot/models/model_params.py @@ -3,6 +3,10 @@ from typing import Literal from dataclasses import dataclass, field +class ParamValidationError(ValueError): + """A model parameter takes an invalid value.""" + pass + @dataclass class UncParams: beta: float = 1.96 @@ -14,7 +18,15 @@ class UncParams: def __post_init__(self): if self.beta < 0.0: - raise ValueError(f"Value for 'beta' is {self.beta} but must be '>= 0.0'.") + raise ParamValidationError( + f"Value for 'beta' is {self.beta}; must be positive." + ) + + if self.acq_sense not in ("exploration", "penalty"): + raise ParamValidationError( + f"Value for 'acq_sense' is '{self.acq_sense}'; must be in ('exploration', 'penalty')." + ) + @dataclass class TrainParams: @@ -31,30 +43,27 @@ class TrainParams: @dataclass class TreeTrainParams: - train_params: "TrainParams" = field(default_factory=TrainParams) + train_params: "TrainParams" = field(default_factory=dict) train_lib: Literal["lgbm"] = "lgbm" - @staticmethod - def fromdict(d: dict): - d_train_params = d.get("train_params", {}) - d_tree_train_params = {k: v for k, v in d.items() if k!="train_params"} - return TreeTrainParams( - train_params=TrainParams(**d_train_params), - **d_tree_train_params - ) + def __post_init__(self): + self.train_params = TrainParams(**self.train_params) + + if self.train_lib not in ("lgbm",): + raise ParamValidationError( + f"Value for 'train_lib' is {self.train_lib}; must be in ('lgbm',)" + ) @dataclass class EntingParams: - unc_params: "UncParams" = field(default_factory=UncParams) - tree_train_params: "TreeTrainParams" = field(default_factory=TreeTrainParams) - - @staticmethod - def fromdict(d: dict): - d_unc_params = d.get("unc_params", {}) - d_tree_train_params = d.get("tree_train_params", {}) - - return EntingParams( - unc_params=UncParams(**d_unc_params), - tree_train_params=TreeTrainParams.fromdict(d_tree_train_params) - ) \ No newline at end of file + """Contains parameters for a mean and uncertainty model. + + Provides a structured dataclass for the parameters of an Enting model, + alongside default values and some light data validation.""" + unc_params: "UncParams" = field(default_factory=dict) + tree_train_params: "TreeTrainParams" = field(default_factory=dict) + + def __post_init__(self): + self.unc_params = UncParams(**self.unc_params) + self.tree_train_params = TreeTrainParams(**self.tree_train_params) \ No newline at end of file diff --git a/entmoot/models/uncertainty_models/distance_based_uncertainty.py b/entmoot/models/uncertainty_models/distance_based_uncertainty.py index db49237..f68308b 100644 --- a/entmoot/models/uncertainty_models/distance_based_uncertainty.py +++ b/entmoot/models/uncertainty_models/distance_based_uncertainty.py @@ -10,7 +10,7 @@ from entmoot.models.uncertainty_models.goodall4_distance import Goodall4Distance from entmoot.models.uncertainty_models.of_distance import OfDistance -from entmoot.models.model_params import UncParams +from entmoot.models.model_params import UncParams, ParamValidationError from typing import Union import numpy as np @@ -33,7 +33,12 @@ def distance_func_mapper(dist_name: str, cat: bool) -> Union[CatDistance, NonCat class DistanceBasedUncertainty(BaseModel): - def __init__(self, problem_config, params: UncParams): + def __init__(self, problem_config, params: Union[UncParams, dict, None] = None): + if params is None: + params = {} + if isinstance(params, dict): + params = UncParams(**params) + self._problem_config = problem_config self._non_cat_x, self._cat_x = None, None @@ -59,54 +64,36 @@ def __init__(self, problem_config, params: UncParams): self._bound_coeff = None self._dist_coeff = 1 / len(self._problem_config.feat_list) else: - raise IOError( + raise ParamValidationError( f"Pick 'dist_trafo' '{params.dist_trafo}' in '('normal', 'standard')'." ) # pick distance metric for non-cat features - # non_cat_distance = distance_func_mapper(params.dist_metric, cat=False) - if params.dist_metric == "euclidean_squared": - self.non_cat_unc_model = EuclideanSquaredDistance( - problem_config=self._problem_config, - acq_sense=params.acq_sense, - dist_trafo=params.dist_trafo, - ) - elif params.dist_metric == "l1": - self.non_cat_unc_model = L1Distance( - problem_config=self._problem_config, - acq_sense=params.acq_sense, - dist_trafo=params.dist_trafo, + non_cat_distance = distance_func_mapper(params.dist_metric, cat=False) + if non_cat_distance is None: + raise ParamValidationError( + f"Non-categorical uncertainty metric '{params.dist_metric}' for " + f"{self.__class__.__name__} model is not supported. " + f"Check 'params['dist_metric']'." ) - elif params.dist_metric == "l2": - self.non_cat_unc_model = L2Distance( + else: + self.non_cat_unc_model: NonCatDistance = non_cat_distance( problem_config=self._problem_config, acq_sense=params.acq_sense, dist_trafo=params.dist_trafo, ) - else: - raise IOError( - f"Non-categorical uncertainty metric '{params.dist_metric}' for " - f"{self.__class__.__name__} model is not supported. " - f"Check 'params['uncertainty_type']'." - ) # pick distance metric for cat features - if params.cat_metric == "overlap": - self.cat_unc_model = OverlapDistance( - problem_config=self._problem_config, acq_sense=params.acq_sense - ) - elif params.cat_metric == "of": - self.cat_unc_model = OfDistance( - problem_config=self._problem_config, acq_sense=params.acq_sense - ) - elif params.cat_metric == "goodall4": - self.cat_unc_model = Goodall4Distance( - problem_config=self._problem_config, acq_sense=params.acq_sense + cat_distance = distance_func_mapper(params.cat_metric, cat=True) + if cat_distance is None: + raise ParamValidationError( + f"Categorical uncertainty metric '{params.cat_metric}' for {self.__class__.__name__} " + f"model is not supported. Check 'params['cat_metric']'." ) else: - raise IOError( - f"Categorical uncertainty metric '{params.cat_metric}' for {self.__class__.__name__} " - f"model is not supported. Check 'params['uncertainty_type']'." + self.cat_unc_model: CatDistance = cat_distance( + problem_config=self._problem_config, + acq_sense=params.acq_sense, ) @property diff --git a/tests/test_model_params.py b/tests/test_model_params.py new file mode 100644 index 0000000..c8e5b96 --- /dev/null +++ b/tests/test_model_params.py @@ -0,0 +1,30 @@ +from entmoot.models.model_params import EntingParams, ParamValidationError +import pytest + +def test_model_params_creation(): + """Check EntingParams is instantiated correctly, and check default values.""" + params = EntingParams(**{ + "unc_params": {"beta": 2}, + "tree_train_params" : { + "train_params": {"max_depth": 5} + } + }) + + assert params.unc_params.beta == 2 + assert params.tree_train_params.train_params.max_depth == 5 + + # check a selection of defaults + assert params.unc_params.acq_sense in ("exploration", "penalty") + assert params.tree_train_params.train_params.min_data_in_leaf == 1 + + +def test_model_params_invalid_values(): + """Check EntingParams raises an error for invalid values.""" + with pytest.raises(ParamValidationError): + _ = EntingParams(**{"unc_params": {"beta": -1}}) + + with pytest.raises(ParamValidationError): + _ = EntingParams(**{"tree_train_params": {"train_lib": "notimplementedlib"}}) + + with pytest.raises(ParamValidationError): + _ = EntingParams(**{"unc_params": {"acq_sense": "notimplementedsense"}}) \ No newline at end of file From 148925d6ff359b6347a7e1d5de488b78d94f644c Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Tue, 10 Oct 2023 12:02:54 +0100 Subject: [PATCH 13/17] Update consistency tests to EntingParams --- docs/notebooks/multi_obj_pareto.ipynb | 31 +++++- entmoot/__init__.py | 1 + entmoot/models/model_params.py | 10 +- tests/test_consistency_gurobi.py | 126 +++++++++++++----------- tests/test_consistency_pyomo.py | 132 ++++++++++++++------------ tests/test_model_params.py | 11 ++- 6 files changed, 183 insertions(+), 128 deletions(-) diff --git a/docs/notebooks/multi_obj_pareto.ipynb b/docs/notebooks/multi_obj_pareto.ipynb index 07996a4..424521f 100644 --- a/docs/notebooks/multi_obj_pareto.ipynb +++ b/docs/notebooks/multi_obj_pareto.ipynb @@ -45,10 +45,31 @@ "\n", "# sample data\n", "rnd_sample = problem_config.get_rnd_sample_list(num_samples=20)\n", - "testfunc_evals = eval_multi_obj_cat_testfunc(rnd_sample, n_obj=number_objectives)\n", + "testfunc_evals = eval_multi_obj_cat_testfunc(rnd_sample, n_obj=number_objectives)" + ] + }, + { + "cell_type": "markdown", + "id": "180e8e4b", + "metadata": {}, + "source": [ + "Model parameters can be defined in a few ways:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63286741", + "metadata": {}, + "outputs": [], + "source": [ + "from entmoot import EntingParams, UncParams, TrainParams, TreeTrainParams\n", "\n", - " \n", + "# all three of the below `params` are valid arguments for Enting\n", "params = {\"unc_params\": {\"dist_metric\": \"l1\", \"acq_sense\": \"exploration\"}}\n", + "params = EntingParams(**params)\n", + "params = EntingParams(unc_params=UncParams(dist_metric=\"l1\", acq_sense=\"exploration\"))\n", + "\n", "enting = Enting(problem_config, params=params)\n", "# fit tree ensemble\n", "enting.fit(rnd_sample, testfunc_evals)" @@ -101,9 +122,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "enttest", "language": "python", - "name": "python3" + "name": "enttest" }, "language_info": { "codemirror_mode": { @@ -115,7 +136,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.9.9" } }, "nbformat": 4, diff --git a/entmoot/__init__.py b/entmoot/__init__.py index c443348..df6dde7 100644 --- a/entmoot/__init__.py +++ b/entmoot/__init__.py @@ -2,3 +2,4 @@ from entmoot.models.enting import Enting from entmoot.optimizers.gurobi_opt import GurobiOptimizer from entmoot.optimizers.pyomo_opt import PyomoOptimizer +from entmoot.models.model_params import EntingParams, UncParams, TreeTrainParams, TrainParams \ No newline at end of file diff --git a/entmoot/models/model_params.py b/entmoot/models/model_params.py index 410c9db..7b258e7 100644 --- a/entmoot/models/model_params.py +++ b/entmoot/models/model_params.py @@ -47,7 +47,8 @@ class TreeTrainParams: train_lib: Literal["lgbm"] = "lgbm" def __post_init__(self): - self.train_params = TrainParams(**self.train_params) + if isinstance(self.train_params, dict): + self.train_params = TrainParams(**self.train_params) if self.train_lib not in ("lgbm",): raise ParamValidationError( @@ -65,5 +66,8 @@ class EntingParams: tree_train_params: "TreeTrainParams" = field(default_factory=dict) def __post_init__(self): - self.unc_params = UncParams(**self.unc_params) - self.tree_train_params = TreeTrainParams(**self.tree_train_params) \ No newline at end of file + if isinstance(self.unc_params, dict): + self.unc_params = UncParams(**self.unc_params) + + if isinstance(self.tree_train_params, dict): + self.tree_train_params = TreeTrainParams(**self.tree_train_params) \ No newline at end of file diff --git a/tests/test_consistency_gurobi.py b/tests/test_consistency_gurobi.py index e4c2282..f23b949 100644 --- a/tests/test_consistency_gurobi.py +++ b/tests/test_consistency_gurobi.py @@ -4,6 +4,7 @@ import math from entmoot import Enting, ProblemConfig, GurobiOptimizer +from entmoot.models.model_params import EntingParams, UncParams, TreeTrainParams, TrainParams from entmoot.benchmarks import ( build_multi_obj_categorical_problem, eval_multi_obj_cat_testfunc, @@ -69,14 +70,14 @@ def run_gurobi(rnd_seed, n_obj, params, params_opt, num_samples=20, no_cat=False @pytest.mark.parametrize("n_obj", [1, 2]) def test_gurobi_consistency1(rnd_seed, n_obj, acq_sense, dist_metric, cat_metric): # define model params - params = { - "unc_params": { - "dist_metric": dist_metric, - "acq_sense": acq_sense, - "dist_trafo": "normal", - "cat_metric": cat_metric, - } - } + params = EntingParams( + unc_params=UncParams( + dist_metric=dist_metric, + acq_sense=acq_sense, + dist_trafo="normal", + cat_metric=cat_metric, + ) + ) params_opt = {"LogToConsole": 1, "MIPGap": 0} run_gurobi(rnd_seed, n_obj, params, params_opt, num_samples=200) @@ -88,15 +89,15 @@ def test_gurobi_consistency1(rnd_seed, n_obj, acq_sense, dist_metric, cat_metric @pytest.mark.parametrize("n_obj", [1, 2]) def test_gurobi_consistency2(rnd_seed, n_obj, acq_sense, dist_metric, cat_metric): # define model params - params = { - "unc_params": { - "dist_metric": dist_metric, - "acq_sense": acq_sense, - "dist_trafo": "normal", - "cat_metric": cat_metric, - }, - } - params["unc_params"]["beta"] = 0.05 + params = EntingParams( + unc_params=UncParams( + dist_metric=dist_metric, + acq_sense=acq_sense, + dist_trafo="normal", + cat_metric=cat_metric, + ) + ) + params.unc_params.beta = 0.05 params_opt = {"LogToConsole": 1, "MIPGap": 0} run_gurobi(rnd_seed, n_obj, params, params_opt, num_samples=300) @@ -109,26 +110,31 @@ def test_gurobi_consistency2(rnd_seed, n_obj, acq_sense, dist_metric, cat_metric @pytest.mark.parametrize("n_obj", [1, 2]) def test_gurobi_consistency3(rnd_seed, n_obj, acq_sense, dist_metric, cat_metric): # define model params - params = {} - params["unc_params"] = { - "dist_metric": dist_metric, - "acq_sense": acq_sense, - "dist_trafo": "normal", - "cat_metric": cat_metric, - } - - # make tree model smaller to reduce testing time - params["tree_train_params"] = { - "objective": "regression", - "metric": "rmse", - "boosting": "gbdt", - "num_boost_round": 2, - "max_depth": 2, - "min_data_in_leaf": 1, - "min_data_per_group": 1, - "verbose": -1, - } - params["unc_params"]["beta"] = 0.05 + params = EntingParams( + unc_params=UncParams( + dist_metric=dist_metric, + acq_sense=acq_sense, + dist_trafo="normal", + cat_metric=cat_metric + ), + + # make tree model smaller to reduce testing time + tree_train_params=TreeTrainParams( + train_lib="lgbm", + train_params=TrainParams( + objective="regression", + metric="rmse", + boosting="gbdt", + num_boost_round=2, + max_depth=2, + min_data_in_leaf=1, + min_data_per_group=1, + verbose=-1 + ) + ) + ) + + params.unc_params.beta = 0.05 params_opt = {"LogToConsole": 1} if n_obj == 1: @@ -144,13 +150,15 @@ def test_gurobi_consistency3(rnd_seed, n_obj, acq_sense, dist_metric, cat_metric @pytest.mark.parametrize("acq_sense", ["exploration"]) @pytest.mark.parametrize("rnd_seed", [100, 101, 102]) def test_gurobi_consistency4(rnd_seed, acq_sense, dist_metric): - params = {} - params["unc_params"] = { - "dist_metric": dist_metric, - "acq_sense": acq_sense, - "dist_trafo": "standard", - } - params["unc_params"]["beta"] = 0.1 + params = EntingParams( + unc_params=UncParams( + dist_metric=dist_metric, + acq_sense=acq_sense, + dist_trafo="standard", + ) + ) + + params.unc_params.beta = 0.1 params_opt = {"LogToConsole": 1, "MIPGap": 1e-5} run_gurobi(rnd_seed, 1, params, params_opt, num_samples=20, no_cat=True) @@ -160,13 +168,14 @@ def test_gurobi_consistency4(rnd_seed, acq_sense, dist_metric): @pytest.mark.parametrize("acq_sense", ["penalty"]) @pytest.mark.parametrize("rnd_seed", [100, 101, 102]) def test_gurobi_consistency5(rnd_seed, acq_sense, dist_metric): - params = {} - params["unc_params"] = { - "dist_metric": dist_metric, - "acq_sense": acq_sense, - "dist_trafo": "standard", - } - params["unc_params"]["beta"] = 0.1 + params = EntingParams( + unc_params=UncParams( + dist_metric=dist_metric, + acq_sense=acq_sense, + dist_trafo="standard", + ) + ) + params.unc_params.beta = 0.1 params_opt = {"LogToConsole": 1, "MIPGap": 1e-5} run_gurobi(rnd_seed, 1, params, params_opt, num_samples=200, no_cat=True) @@ -176,13 +185,14 @@ def test_gurobi_consistency5(rnd_seed, acq_sense, dist_metric): @pytest.mark.parametrize("acq_sense", ["penalty"]) @pytest.mark.parametrize("rnd_seed", [100, 101, 102]) def test_gurobi_consistency6(rnd_seed, acq_sense, dist_metric): - params = {} - params["unc_params"] = { - "dist_metric": dist_metric, - "acq_sense": acq_sense, - "dist_trafo": "standard", - } - params["unc_params"]["beta"] = 0.05 + params = EntingParams( + unc_params=UncParams( + dist_metric=dist_metric, + acq_sense=acq_sense, + dist_trafo="standard", + ) + ) + params.unc_params.beta = 0.05 params_opt = {"LogToConsole": 1, "MIPGapAbs": 0.01} run_gurobi(rnd_seed, 1, params, params_opt, num_samples=200, no_cat=True) diff --git a/tests/test_consistency_pyomo.py b/tests/test_consistency_pyomo.py index 3e7e4ec..a69c11a 100644 --- a/tests/test_consistency_pyomo.py +++ b/tests/test_consistency_pyomo.py @@ -4,6 +4,7 @@ import math from entmoot import Enting, ProblemConfig, PyomoOptimizer +from entmoot.models.model_params import EntingParams, UncParams, TreeTrainParams, TrainParams from entmoot.benchmarks import ( build_multi_obj_categorical_problem, eval_multi_obj_cat_testfunc, @@ -71,14 +72,14 @@ def run_pyomo(rnd_seed, n_obj, params, params_opt, num_samples=20, no_cat=False) @pytest.mark.parametrize("n_obj", [1, 2]) def test_pyomo_consistency1(rnd_seed, n_obj, acq_sense, dist_metric, cat_metric): # define model params - params = { - "unc_params": { - "dist_metric": dist_metric, - "acq_sense": acq_sense, - "dist_trafo": "normal", - "cat_metric": cat_metric, - } - } + params = EntingParams( + unc_params=UncParams( + dist_metric=dist_metric, + acq_sense=acq_sense, + dist_trafo="normal", + cat_metric=cat_metric, + ) + ) params_opt = { "solver_name": "gurobi", "solver_options": {"NonConvex": 2, "MIPGap": 0}, @@ -93,15 +94,15 @@ def test_pyomo_consistency1(rnd_seed, n_obj, acq_sense, dist_metric, cat_metric) @pytest.mark.parametrize("n_obj", [1, 2]) def test_gurobi_consistency2(rnd_seed, n_obj, acq_sense, dist_metric, cat_metric): # define model params - params = { - "unc_params": { - "dist_metric": dist_metric, - "acq_sense": acq_sense, - "dist_trafo": "normal", - "cat_metric": cat_metric, - }, - } - params["unc_params"]["beta"] = 0.05 + params = EntingParams( + unc_params=UncParams( + dist_metric=dist_metric, + acq_sense=acq_sense, + dist_trafo="normal", + cat_metric=cat_metric, + ) + ) + params.unc_params.beta = 0.05 params_opt = { "solver_name": "gurobi", "solver_options": {"NonConvex": 2, "MIPGap": 0}, @@ -117,26 +118,31 @@ def test_gurobi_consistency2(rnd_seed, n_obj, acq_sense, dist_metric, cat_metric @pytest.mark.parametrize("n_obj", [1, 2]) def test_pyomo_consistency3(rnd_seed, n_obj, acq_sense, dist_metric, cat_metric): # define model params - params = {} - params["unc_params"] = { - "dist_metric": dist_metric, - "acq_sense": acq_sense, - "dist_trafo": "normal", - "cat_metric": cat_metric, - } - - # make tree model smaller to reduce testing time - params["tree_train_params"] = { - "objective": "regression", - "metric": "rmse", - "boosting": "gbdt", - "num_boost_round": 2, - "max_depth": 2, - "min_data_in_leaf": 1, - "min_data_per_group": 1, - "verbose": -1, - } - params["unc_params"]["beta"] = 0.05 + params = EntingParams( + unc_params=UncParams( + dist_metric=dist_metric, + acq_sense=acq_sense, + dist_trafo="normal", + cat_metric=cat_metric + ), + + # make tree model smaller to reduce testing time + tree_train_params=TreeTrainParams( + train_lib="lgbm", + train_params=TrainParams( + objective="regression", + metric="rmse", + boosting="gbdt", + num_boost_round=2, + max_depth=2, + min_data_in_leaf=1, + min_data_per_group=1, + verbose=-1 + ) + ) + ) + + params.unc_params.beta = 0.05 params_opt = { "solver_name": "gurobi", "solver_options": {"MIPGap": 0, "LogToConsole": 1, "NonConvex": 2}, @@ -154,14 +160,16 @@ def test_pyomo_consistency3(rnd_seed, n_obj, acq_sense, dist_metric, cat_metric) @pytest.mark.parametrize("dist_metric", ["l1", "l2", "euclidean_squared"]) @pytest.mark.parametrize("acq_sense", ["exploration"]) @pytest.mark.parametrize("rnd_seed", [100, 101, 102]) -def test_gurobi_consistency4(rnd_seed, acq_sense, dist_metric): - params = {} - params["unc_params"] = { - "dist_metric": dist_metric, - "acq_sense": acq_sense, - "dist_trafo": "standard", - } - params["unc_params"]["beta"] = 0.1 +def test_pyomo_consistency4(rnd_seed, acq_sense, dist_metric): + params = EntingParams( + unc_params=UncParams( + dist_metric=dist_metric, + acq_sense=acq_sense, + dist_trafo="standard", + ) + ) + + params.unc_params.beta = 0.1 params_opt = { "solver_name": "gurobi", "solver_options": {"NonConvex": 2, "MIPGap": 1e-5}, @@ -173,14 +181,15 @@ def test_gurobi_consistency4(rnd_seed, acq_sense, dist_metric): @pytest.mark.parametrize("dist_metric", ["l1", "euclidean_squared"]) @pytest.mark.parametrize("acq_sense", ["penalty"]) @pytest.mark.parametrize("rnd_seed", [100, 101, 102]) -def test_gurobi_consistency5(rnd_seed, acq_sense, dist_metric): - params = {} - params["unc_params"] = { - "dist_metric": dist_metric, - "acq_sense": acq_sense, - "dist_trafo": "standard", - } - params["unc_params"]["beta"] = 0.1 +def test_pyomo_consistency5(rnd_seed, acq_sense, dist_metric): + params = EntingParams( + unc_params=UncParams( + dist_metric=dist_metric, + acq_sense=acq_sense, + dist_trafo="standard", + ) + ) + params.unc_params.beta = 0.1 params_opt = { "solver_name": "gurobi", "solver_options": {"NonConvex": 2, "MIPGap": 1e-5}, @@ -192,14 +201,15 @@ def test_gurobi_consistency5(rnd_seed, acq_sense, dist_metric): @pytest.mark.parametrize("dist_metric", ["l2"]) @pytest.mark.parametrize("acq_sense", ["penalty"]) @pytest.mark.parametrize("rnd_seed", [100, 101, 102]) -def test_gurobi_consistency6(rnd_seed, acq_sense, dist_metric): - params = {} - params["unc_params"] = { - "dist_metric": dist_metric, - "acq_sense": acq_sense, - "dist_trafo": "standard", - } - params["unc_params"]["beta"] = 0.05 +def test_pyomo_consistency6(rnd_seed, acq_sense, dist_metric): + params = EntingParams( + unc_params=UncParams( + dist_metric=dist_metric, + acq_sense=acq_sense, + dist_trafo="standard", + ) + ) + params.unc_params.beta = 0.05 params_opt = { "solver_name": "gurobi", "solver_options": {"NonConvex": 2, "MIPGap": 1e-5}, diff --git a/tests/test_model_params.py b/tests/test_model_params.py index c8e5b96..e30ab5f 100644 --- a/tests/test_model_params.py +++ b/tests/test_model_params.py @@ -1,4 +1,4 @@ -from entmoot.models.model_params import EntingParams, ParamValidationError +from entmoot.models.model_params import EntingParams, UncParams, TrainParams, TreeTrainParams, ParamValidationError import pytest def test_model_params_creation(): @@ -17,6 +17,15 @@ def test_model_params_creation(): assert params.unc_params.acq_sense in ("exploration", "penalty") assert params.tree_train_params.train_params.min_data_in_leaf == 1 + # check alternate initialisation method + params_other = EntingParams( + unc_params=UncParams(beta=2), + tree_train_params=TreeTrainParams( + train_params=TrainParams(max_depth=5) + ) + ) + assert params == params_other + def test_model_params_invalid_values(): """Check EntingParams raises an error for invalid values.""" From cdd8b9ac43a7d001acb1756f3079f54153c71f6e Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Tue, 10 Oct 2023 12:18:30 +0100 Subject: [PATCH 14/17] Update remaining tests to EntingParams --- tests/test_curr.py | 21 +++++++++++++++++---- tests/test_optimality_gurobi.py | 17 +++++++++-------- tests/test_optimality_pyomo.py | 17 +++++++++-------- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/tests/test_curr.py b/tests/test_curr.py index f80416c..1ae34cb 100644 --- a/tests/test_curr.py +++ b/tests/test_curr.py @@ -1,6 +1,7 @@ import math from entmoot import Enting, ProblemConfig, GurobiOptimizer, PyomoOptimizer +from entmoot.models.model_params import EntingParams, UncParams from entmoot.benchmarks import ( build_multi_obj_categorical_problem, eval_multi_obj_cat_testfunc, @@ -42,7 +43,10 @@ def test_multiobj_constraints(): rnd_sample = problem_config.get_rnd_sample_list(num_samples=20) testfunc_evals = eval_multi_obj_cat_testfunc(rnd_sample, n_obj=number_objectives) - params = {"unc_params": {"dist_metric": "l1", "acq_sense": "exploration"}} + params = EntingParams(unc_params=UncParams( + dist_metric="l1", + acq_sense="exploration" + )) enting = Enting(problem_config, params=params) # fit tree ensemble enting.fit(rnd_sample, testfunc_evals) @@ -107,7 +111,10 @@ def my_func(x: float) -> float: y_train = np.reshape([my_func(x) for x in X_train], (-1, 1)) # Define enting object and corresponding parameters - params = {"unc_params": {"dist_metric": "l1", "acq_sense": "penalty"}} + params = EntingParams(unc_params=UncParams( + dist_metric="l1", + acq_sense="exploration" + )) enting = Enting(problem_config, params=params) # Fit tree model enting.fit(X_train, y_train) @@ -138,7 +145,10 @@ def test_compare_pyomo_gurobipy_multiobj(): for metric in ["l1", "l2", "euclidean_squared"]: for acq_sense in ["exploration", "penalty"]: - params = {"unc_params": {"dist_metric": metric, "acq_sense": acq_sense}} + params = EntingParams(unc_params=UncParams( + dist_metric=metric, + acq_sense=acq_sense + )) enting = Enting(problem_config, params=params) # fit tree ensemble enting.fit(rnd_sample, testfunc_evals) @@ -179,7 +189,10 @@ def test_compare_pyomo_gurobipy_singleobj(): for metric in ["l1", "l2", "euclidean_squared"]: for acq_sense in ["exploration", "penalty"]: - params = {"unc_params": {"dist_metric": metric, "acq_sense": acq_sense}} + params = EntingParams(unc_params=UncParams( + dist_metric=metric, + acq_sense=acq_sense + )) enting = Enting(problem_config, params=params) # fit tree ensemble enting.fit(rnd_sample, testfunc_evals) diff --git a/tests/test_optimality_gurobi.py b/tests/test_optimality_gurobi.py index cfa3dee..234ef66 100644 --- a/tests/test_optimality_gurobi.py +++ b/tests/test_optimality_gurobi.py @@ -4,6 +4,7 @@ import numpy as np from entmoot import Enting, ProblemConfig, GurobiOptimizer +from entmoot.models.model_params import EntingParams, UncParams from entmoot.benchmarks import ( build_multi_obj_categorical_problem, eval_multi_obj_cat_testfunc, @@ -91,13 +92,13 @@ def run_gurobi( @pytest.mark.parametrize("rnd_seed", [100, 101]) def test_gurobi_optimality(rnd_seed, acq_sense, dist_metric, cat_metric): # define model params - params = { - "unc_params": { - "dist_metric": dist_metric, - "acq_sense": acq_sense, - "dist_trafo": "normal", - "cat_metric": cat_metric, - } - } + params = EntingParams( + unc_params=UncParams( + dist_metric=dist_metric, + acq_sense=acq_sense, + dist_trafo="normal", + cat_metric=cat_metric, + ) + ) params_opt = {"LogToConsole": 1, "MIPGap": 0} run_gurobi(rnd_seed, params, params_opt, num_samples=20) diff --git a/tests/test_optimality_pyomo.py b/tests/test_optimality_pyomo.py index eacb63f..d08c8be 100644 --- a/tests/test_optimality_pyomo.py +++ b/tests/test_optimality_pyomo.py @@ -4,6 +4,7 @@ import numpy as np from entmoot import Enting, ProblemConfig, PyomoOptimizer +from entmoot.models.model_params import EntingParams, UncParams from entmoot.benchmarks import ( build_multi_obj_categorical_problem, eval_multi_obj_cat_testfunc, @@ -91,14 +92,14 @@ def run_pyomo( @pytest.mark.parametrize("rnd_seed", [100, 101]) def test_pyomo_optimality(rnd_seed, acq_sense, dist_metric, cat_metric): # define model params - params = { - "unc_params": { - "dist_metric": dist_metric, - "acq_sense": acq_sense, - "dist_trafo": "normal", - "cat_metric": cat_metric, - } - } + params = EntingParams( + unc_params=UncParams( + dist_metric=dist_metric, + acq_sense=acq_sense, + dist_trafo="normal", + cat_metric=cat_metric, + ) + ) params_opt = { "solver_name": "gurobi", "solver_options": {"MIPGap": 0, "LogToConsole": 1, "NonConvex": 2}, From 122bf708550c8a7d5e92b089cf2b314ae93d7739 Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Fri, 20 Oct 2023 13:51:24 +0100 Subject: [PATCH 15/17] Create ConstraintList for multiple constraints --- docs/notebooks/constraint_classes.ipynb | 160 ++++++++++++++++++++++-- entmoot/constraints.py | 39 +++++- 2 files changed, 187 insertions(+), 12 deletions(-) diff --git a/docs/notebooks/constraint_classes.ipynb b/docs/notebooks/constraint_classes.ipynb index 8e50330..e38e740 100644 --- a/docs/notebooks/constraint_classes.ipynb +++ b/docs/notebooks/constraint_classes.ipynb @@ -35,16 +35,7 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\users\\tobyb\\phd\\entmoot\\entmoot\\models\\mean_models\\tree_ensemble.py:23: UserWarning: No 'train_params' for tree ensemble training specified. Switch training to default params!\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "from entmoot.benchmarks import build_reals_only_problem, eval_reals_only_testfunc\n", "\n", @@ -189,10 +180,157 @@ "from entmoot.constraints import ExpressionConstraint\n", "\n", "class SumLessThanTen(ExpressionConstraint):\n", - " \"\"\"A constraint that enforces all features to be equal.\"\"\"\n", + " \"\"\"A constraint that enforces selected features to sum to less than ten.\"\"\"\n", " def _get_expr(self, features):\n", " return sum(features) <= 10" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constraint Lists\n", + "\n", + "For a problem definition, it may be easier to define a set of constraints." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "problem_config = ProblemConfig(rnd_seed=73)\n", + "build_reals_only_problem(problem_config)\n", + "rnd_sample = problem_config.get_rnd_sample_list(num_samples=50)\n", + "testfunc_evals = eval_reals_only_testfunc(rnd_sample)\n", + "\n", + "params = {\"unc_params\": {\"dist_metric\": \"l1\", \"acq_sense\": \"penalty\"}}\n", + "enting = Enting(problem_config, params=params)\n", + "# fit tree ensemble\n", + "enting.fit(rnd_sample, testfunc_evals)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from entmoot.constraints import LinearInequalityConstraint, ConstraintList\n", + "import pyomo.environ as pyo\n", + "model_pyo = problem_config.get_pyomo_model_core()\n", + "\n", + "# define the constraint\n", + "# then immediately apply it to the model\n", + "constraints = [\n", + " NChooseKConstraint(\n", + " feature_keys=[\"x1\", \"x2\", \"x3\", \"x4\", \"x5\"], \n", + " min_count=1,\n", + " max_count=4,\n", + " none_also_valid=True\n", + " ),\n", + " LinearInequalityConstraint(\n", + " feature_keys=[\"x3\", \"x4\", \"x5\"],\n", + " coefficients=[1, 1, 1],\n", + " rhs=12.0\n", + " )\n", + "]\n", + "\n", + "model_pyo.problem_constraints = pyo.ConstraintList()\n", + "ConstraintList(constraints).apply_pyomo_constraints(\n", + " model_pyo, problem_config.feat_list, model_pyo.problem_constraints\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Set parameter Username\n", + "Academic license - for non-commercial use only - expires 2024-09-06\n", + "Read LP format model from file C:\\Users\\tobyb\\AppData\\Local\\Temp\\tmp0tfjrq6i.pyomo.lp\n", + "Reading time = 0.01 seconds\n", + "x1: 2775 rows, 1913 columns, 9133 nonzeros\n", + "Gurobi Optimizer version 10.0.2 build v10.0.2rc0 (win64)\n", + "\n", + "CPU model: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz, instruction set [SSE2|AVX|AVX2|AVX512]\n", + "Thread count: 4 physical cores, 8 logical processors, using up to 8 threads\n", + "\n", + "Optimize a model with 2775 rows, 1913 columns and 9133 nonzeros\n", + "Model fingerprint: 0x8879e895\n", + "Variable types: 1292 continuous, 621 integer (621 binary)\n", + "Coefficient statistics:\n", + " Matrix range [1e-06, 1e+06]\n", + " Objective range [1e+00, 2e+00]\n", + " Bounds range [1e+00, 5e+00]\n", + " RHS range [1e-04, 1e+01]\n", + "Presolve removed 273 rows and 260 columns\n", + "Presolve time: 0.05s\n", + "Presolved: 2502 rows, 1653 columns, 8084 nonzeros\n", + "Variable types: 1282 continuous, 371 integer (371 binary)\n", + "Found heuristic solution: objective 10.1607516\n", + "\n", + "Root relaxation: objective 2.501750e+00, 463 iterations, 0.00 seconds (0.00 work units)\n", + "\n", + " Nodes | Current Node | Objective Bounds | Work\n", + " Expl Unexpl | Obj Depth IntInf | Incumbent BestBd Gap | It/Node Time\n", + "\n", + " 0 0 2.50175 0 11 10.16075 2.50175 75.4% - 0s\n", + "H 0 0 10.1529199 2.50175 75.4% - 0s\n", + "H 0 0 2.8865055 2.50175 13.3% - 0s\n", + " 0 0 2.60484 0 3 2.88651 2.60484 9.76% - 0s\n", + " 0 0 2.60484 0 6 2.88651 2.60484 9.76% - 0s\n", + " 0 0 2.60484 0 5 2.88651 2.60484 9.76% - 0s\n", + "H 0 0 2.8764756 2.60484 9.44% - 0s\n", + " 0 0 2.69557 0 11 2.87648 2.69557 6.29% - 0s\n", + "H 0 0 2.8510016 2.69557 5.45% - 0s\n", + "* 0 0 0 2.8510016 2.85100 0.00% - 0s\n", + "\n", + "Cutting planes:\n", + " Cover: 1\n", + " Clique: 5\n", + " RLT: 1\n", + " Relax-and-lift: 6\n", + "\n", + "Explored 1 nodes (786 simplex iterations) in 0.16 seconds (0.12 work units)\n", + "Thread count was 8 (of 8 available processors)\n", + "\n", + "Solution count 5: 2.851 2.87648 2.88651 ... 10.1608\n", + "\n", + "Optimal solution found (tolerance 1.00e-04)\n", + "Best objective 2.851001621749e+00, best bound 2.851001621749e+00, gap 0.0000%\n" + ] + } + ], + "source": [ + "# optimise the model\n", + "params_pyomo = {\"solver_name\": \"gurobi\"}\n", + "opt_pyo = PyomoOptimizer(problem_config, params=params_pyomo)\n", + "res_pyo = opt_pyo.solve(enting, model_core=model_pyo)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.0, 2.43082, 3.2917799999999997, 4.888964463647816, 3.6007]\n" + ] + } + ], + "source": [ + "print(res_pyo.opt_point)" + ] } ], "metadata": { diff --git a/entmoot/constraints.py b/entmoot/constraints.py index 8b8d807..d437749 100644 --- a/entmoot/constraints.py +++ b/entmoot/constraints.py @@ -3,6 +3,8 @@ import pyomo.environ as pyo +from entmoot.problem_config import FeatureType + if TYPE_CHECKING: from problem_config import FeatureType @@ -40,6 +42,39 @@ def as_pyomo_constraint( pass +class ConstraintList: + """Contains multiple constraints to be applied at once.""" + + def __init__(self, constraints: list[Constraint]): + self._constraints = constraints + + def add(self, constraint: Constraint): + self._constraints.append(constraint) + + def apply_pyomo_constraints( + self, + model: pyo.ConcreteModel, + feat_list: list[FeatureType], + pyo_constraint_list: pyo.ConstraintList, + ) -> None: + """Add constraints to a pyo.ConstraintList object. + + Requires creation of the pyo.ConstraintList outside of this class, + to the user to specify the constraints name.""" + + for constraint in self._constraints: + features = constraint._get_feature_vars(model, feat_list) + if isinstance(constraint, ExpressionConstraint): + expr = constraint._get_expr(features) + + elif isinstance(constraint, FunctionalConstraint): + # must convert rules to expr + rule = constraint._get_function(model, features) + expr = rule(model, 0) + + pyo_constraint_list.add(expr) + + class ExpressionConstraint(Constraint): """Constraints defined by pyomo.Expressions. @@ -70,7 +105,9 @@ def as_pyomo_constraint( return pyo.Constraint(rule=self._get_function(model, features)) @abstractmethod - def _get_function(self, features) -> ConstraintFunctionType: + def _get_function( + self, model: pyo.ConcreteModel, features: list["FeatureType"] + ) -> ConstraintFunctionType: pass From c98e2f959ed9492d124f06928e8a40cc4cf42ebe Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Fri, 20 Oct 2023 14:01:12 +0100 Subject: [PATCH 16/17] Add test for ConstraintList --- tests/test_constraints_pyomo.py | 56 ++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/tests/test_constraints_pyomo.py b/tests/test_constraints_pyomo.py index 2eaff5c..8a5b767 100644 --- a/tests/test_constraints_pyomo.py +++ b/tests/test_constraints_pyomo.py @@ -1,6 +1,10 @@ from entmoot.problem_config import ProblemConfig from entmoot.models.enting import Enting from entmoot.optimizers.pyomo_opt import PyomoOptimizer +from entmoot.models.model_params import EntingParams, UncParams +from entmoot.constraints import LinearInequalityConstraint, ConstraintList +import pyomo.environ as pyo + from entmoot.benchmarks import ( build_reals_only_problem, @@ -14,6 +18,9 @@ ) import pytest +PARAMS = EntingParams( + unc_params=UncParams(dist_metric="l1", acq_sense="exploration") +) def test_linear_equality_constraint(): problem_config = ProblemConfig(rnd_seed=73) @@ -25,8 +32,7 @@ def test_linear_equality_constraint(): rnd_sample = problem_config.get_rnd_sample_list(num_samples=20) testfunc_evals = eval_multi_obj_cat_testfunc(rnd_sample, n_obj=number_objectives) - params = {"unc_params": {"dist_metric": "l1", "acq_sense": "exploration"}} - enting = Enting(problem_config, params=params) + enting = Enting(problem_config, params=PARAMS) # fit tree ensemble enting.fit(rnd_sample, testfunc_evals) @@ -66,8 +72,7 @@ def test_nchoosek_constraint(min_count, max_count): rnd_sample = problem_config.get_rnd_sample_list(num_samples=50) testfunc_evals = eval_reals_only_testfunc(rnd_sample) - params = {"unc_params": {"dist_metric": "l1", "acq_sense": "penalty"}} - enting = Enting(problem_config, params=params) + enting = Enting(problem_config, params=PARAMS) # fit tree ensemble enting.fit(rnd_sample, testfunc_evals) @@ -88,3 +93,46 @@ def test_nchoosek_constraint(min_count, max_count): res_pyo = opt_pyo.solve(enting, model_core=model_pyo) assert min_count <= sum(x > 1e-6 for x in res_pyo.opt_point) <= max_count + + +def test_constraint_list(): + problem_config = ProblemConfig(rnd_seed=73) + build_reals_only_problem(problem_config) + rnd_sample = problem_config.get_rnd_sample_list(num_samples=50) + testfunc_evals = eval_reals_only_testfunc(rnd_sample) + + enting = Enting(problem_config, params=PARAMS) + # fit tree ensemble + enting.fit(rnd_sample, testfunc_evals) + + model_pyo = problem_config.get_pyomo_model_core() + + # define the constraints + constraints = [ + NChooseKConstraint( + feature_keys=["x1", "x2", "x3", "x4", "x5"], + min_count=1, + max_count=3, + none_also_valid=True + ), + LinearInequalityConstraint( + feature_keys=["x3", "x4", "x5"], + coefficients=[1, 1, 1], + rhs=10.0 + ) + ] + + # apply constraints to the model + model_pyo.problem_constraints = pyo.ConstraintList() + ConstraintList(constraints).apply_pyomo_constraints( + model_pyo, problem_config.feat_list, model_pyo.problem_constraints + ) + + # optimise the model + params_pyomo = {"solver_name": "gurobi"} + opt_pyo = PyomoOptimizer(problem_config, params=params_pyomo) + res_pyo = opt_pyo.solve(enting, model_core=model_pyo) + + print(res_pyo.opt_point) + assert 1 <= sum(x > 1e-6 for x in res_pyo.opt_point) <= 3 + assert sum(res_pyo.opt_point[2:]) < 10.0 \ No newline at end of file From 166a32d0655aa79e42554b611e91036b9d2052f0 Mon Sep 17 00:00:00 2001 From: TobyBoyne Date: Fri, 20 Oct 2023 14:04:58 +0100 Subject: [PATCH 17/17] Convert NChooseK to an ExpressionConstraint --- entmoot/constraints.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/entmoot/constraints.py b/entmoot/constraints.py index d437749..c4f990a 100644 --- a/entmoot/constraints.py +++ b/entmoot/constraints.py @@ -64,14 +64,10 @@ def apply_pyomo_constraints( for constraint in self._constraints: features = constraint._get_feature_vars(model, feat_list) - if isinstance(constraint, ExpressionConstraint): - expr = constraint._get_expr(features) - - elif isinstance(constraint, FunctionalConstraint): - # must convert rules to expr - rule = constraint._get_function(model, features) - expr = rule(model, 0) + if not isinstance(constraint, ExpressionConstraint): + raise TypeError("Only ExpressionConstraints are supported in a constraint list") + expr = constraint._get_expr(model, features) pyo_constraint_list.add(expr) @@ -85,10 +81,10 @@ def as_pyomo_constraint( self, model: pyo.ConcreteModel, feat_list: list["FeatureType"] ) -> pyo.Constraint: features = self._get_feature_vars(model, feat_list) - return pyo.Constraint(expr=self._get_expr(features)) + return pyo.Constraint(expr=self._get_expr(model, features)) @abstractmethod - def _get_expr(self, features) -> pyo.Expression: + def _get_expr(self, model, features) -> pyo.Expression: pass @@ -126,16 +122,16 @@ def _get_lhs(self, features: pyo.ConcreteModel) -> pyo.Expression: class LinearEqualityConstraint(LinearConstraint): - def _get_expr(self, features): + def _get_expr(self, model, features): return self._get_lhs(features) == self.rhs class LinearInequalityConstraint(LinearConstraint): - def _get_expr(self, features): + def _get_expr(self, model, features): return self._get_lhs(features) <= self.rhs -class NChooseKConstraint(FunctionalConstraint): +class NChooseKConstraint(ExpressionConstraint): """Constrain the number of active features to be bounded by min_count and max_count.""" tol: float = 1e-6 @@ -153,7 +149,7 @@ def __init__( self.none_also_valid = none_also_valid super().__init__(feature_keys) - def _get_function(self, model, features): + def _get_expr(self, model, features): # constrain the features using the binary variable y # where y indicates whether the feature is selected # y * tol <= x <= y * M @@ -168,7 +164,4 @@ def _get_function(self, model, features): model.ub_selected.add(expr=model.feat_selected[i] * self.M >= features[i]) model.lb_selected.add(expr=model.feat_selected[i] * self.tol <= features[i]) - def inner(model, i): - return sum(model.feat_selected.values()) <= self.max_count - - return inner + return sum(model.feat_selected.values()) <= self.max_count