diff --git a/dev/.documenter-siteinfo.json b/dev/.documenter-siteinfo.json index 52e7775a..77e7b00f 100644 --- a/dev/.documenter-siteinfo.json +++ b/dev/.documenter-siteinfo.json @@ -1 +1 @@ -{"documenter":{"julia_version":"1.11.1","generation_timestamp":"2024-11-07T00:48:26","documenter_version":"1.7.0"}} \ No newline at end of file +{"documenter":{"julia_version":"1.11.2","generation_timestamp":"2025-01-17T00:26:30","documenter_version":"1.8.0"}} \ No newline at end of file diff --git a/dev/api/index.html b/dev/api/index.html index b11f3b3c..f13a127b 100644 --- a/dev/api/index.html +++ b/dev/api/index.html @@ -1,5 +1,5 @@ -API Reference · MathOptAI.jl

API Reference

This page lists the public API of MathOptAI.

Info

This page is an unstructured list of the MathOptAI API. For a more structured overview, read the Manual or Tutorial parts of this documentation.

Load all of the public the API into the current scope with:

using MathOptAI

Alternatively, load only the module with:

import MathOptAI

and then prefix all calls with MathOptAI. to create MathOptAI.<NAME>.

AbstractPredictor

add_predictor

MathOptAI.add_predictorFunction
add_predictor(
+API Reference · MathOptAI.jl

API Reference

This page lists the public API of MathOptAI.

Info

This page is an unstructured list of the MathOptAI API. For a more structured overview, read the Manual or Tutorial parts of this documentation.

Load all of the public the API into the current scope with:

using MathOptAI

Alternatively, load only the module with:

import MathOptAI

and then prefix all calls with MathOptAI. to create MathOptAI.<NAME>.

AbstractPredictor

add_predictor

MathOptAI.add_predictorFunction
add_predictor(
     model::JuMP.AbstractModel,
     predictor::AbstractPredictor,
     x::Vector,
@@ -23,7 +23,7 @@
 ├ variables [1]
 │ └ moai_Affine[1]
 └ constraints [1]
-  └ 2 x[1] + 3 x[2] - moai_Affine[1] = 0
source

build_predictor

Affine

build_predictor

Affine

MathOptAI.AffineType
Affine(
     A::Matrix{T},
     b::Vector{T} = zeros(T, size(A, 1)),
 ) where {T} <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = A x + b\]

Example

julia> using JuMP, MathOptAI
@@ -60,7 +60,7 @@
 julia> formulation
 ReducedSpace(Affine(A, b) [input: 2, output: 1])
 ├ variables [0]
-└ constraints [0]
source

BinaryDecisionTree

BinaryDecisionTree

MathOptAI.BinaryDecisionTreeType
BinaryDecisionTree{K,V}(
     feat_id::Int,
     feat_value::K,
     lhs::Union{V,BinaryDecisionTree{K,V}},
@@ -99,7 +99,7 @@
   ├ moai_BinaryDecisionTree_z[2] --> {x[1] ≤ 1}
   ├ moai_BinaryDecisionTree_z[3] --> {x[1] ≥ 0}
   ├ moai_BinaryDecisionTree_z[3] --> {x[1] ≥ 1}
-  └ moai_BinaryDecisionTree_z[1] - moai_BinaryDecisionTree_z[3] + moai_BinaryDecisionTree_value = 0
source

GrayBox

MathOptAI.GrayBoxType
GrayBox(
+  └ moai_BinaryDecisionTree_z[1] - moai_BinaryDecisionTree_z[3] + moai_BinaryDecisionTree_value = 0
source

GrayBox

MathOptAI.GrayBoxType
GrayBox(
     output_size::Function,
     callback::Function;
     has_hessian::Bool = false,
@@ -141,7 +141,7 @@
 julia> formulation
 ReducedSpace(GrayBox)
 ├ variables [0]
-└ constraints [0]
source

Pipeline

MathOptAI.PipelineType
Pipeline(layers::Vector{AbstractPredictor}) <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = (l_1 \circ \ldots \circ l_N)(x)\]

where $l_i$ are a list of other AbstractPredictors.

Example

julia> using JuMP, MathOptAI
+└ constraints [0]
source

Pipeline

MathOptAI.PipelineType
Pipeline(layers::Vector{AbstractPredictor}) <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = (l_1 \circ \ldots \circ l_N)(x)\]

where $l_i$ are a list of other AbstractPredictors.

Example

julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -175,11 +175,11 @@
   ├ moai_ReLU[1] ≥ 0
   ├ moai_z[1] ≥ 0
   ├ moai_Affine[1] - moai_ReLU[1] + moai_z[1] = 0
-  └ moai_ReLU[1]*moai_z[1] = 0
source

PytorchModel

MathOptAI.PytorchModelType
PytorchModel(filename::String)

A wrapper struct for loading a PyTorch model.

The only supported file extension is .pt, where the .pt file has been created using torch.save(model, filename).

Warning

To use PytorchModel, your code must load the PythonCall package:

import PythonCall

Example

julia> using MathOptAI
+  └ moai_ReLU[1]*moai_z[1] = 0
source

PytorchModel

MathOptAI.PytorchModelType
PytorchModel(filename::String)

A wrapper struct for loading a PyTorch model.

The only supported file extension is .pt, where the .pt file has been created using torch.save(model, filename).

Warning

To use PytorchModel, your code must load the PythonCall package:

import PythonCall

Example

julia> using MathOptAI
 
 julia> using PythonCall  #  This line is important!
 
-julia> predictor = PytorchModel("model.pt");
source

Quantile

MathOptAI.QuantileType
Quantile{D}(distribution::D, quantiles::Vector{Float64}) where {D}

An AbstractPredictor that represents the quantiles of distribution.

Example

julia> using JuMP, Distributions, MathOptAI
+julia> predictor = PytorchModel("model.pt");
source

Quantile

MathOptAI.QuantileType
Quantile{D}(distribution::D, quantiles::Vector{Float64}) where {D}

An AbstractPredictor that represents the quantiles of distribution.

Example

julia> using JuMP, Distributions, MathOptAI
 
 julia> model = Model();
 
@@ -204,7 +204,7 @@
 │ └ moai_quantile[2]
 └ constraints [2]
   ├ moai_quantile[1] - op_quantile_0.1(x) = 0
-  └ moai_quantile[2] - op_quantile_0.9(x) = 0
source

ReducedSpace

MathOptAI.ReducedSpaceType
ReducedSpace(predictor::AbstractPredictor)

A wrapper type for other predictors that implement a reduced-space formulation.

Example

julia> using JuMP, MathOptAI
+  └ moai_quantile[2] - op_quantile_0.9(x) = 0
source

ReducedSpace

MathOptAI.ReducedSpaceType
ReducedSpace(predictor::AbstractPredictor)

A wrapper type for other predictors that implement a reduced-space formulation.

Example

julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -217,7 +217,7 @@
 julia> y
 2-element Vector{NonlinearExpr}:
  max(0.0, x[1])
- max(0.0, x[2])
source

ReLU

MathOptAI.ReLUType
ReLU() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

as a non-smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
+ max(0.0, x[2])
source

ReLU

MathOptAI.ReLUType
ReLU() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

as a non-smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -257,7 +257,7 @@
 julia> formulation
 ReducedSpace(ReLU())
 ├ variables [0]
-└ constraints [0]
source

ReLUBigM

MathOptAI.ReLUBigMType
ReLUBigM(M::Float64) <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

via the big-M MIP reformulation:

\[\begin{aligned} +└ constraints [0]

source

ReLUBigM

MathOptAI.ReLUBigMType
ReLUBigM(M::Float64) <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

via the big-M MIP reformulation:

\[\begin{aligned} y \ge 0 \\ y \ge x \\ y \le M z \\ @@ -298,7 +298,7 @@ ├ moai_z[2] binary ├ -x[2] + moai_ReLU[2] ≥ 0 ├ moai_ReLU[2] - 2 moai_z[2] ≤ 0 - └ -x[2] + moai_ReLU[2] + 3 moai_z[2] ≤ 3

source

ReLUQuadratic

MathOptAI.ReLUQuadraticType
ReLUQuadratic() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

by the reformulation:

\[\begin{aligned} + └ -x[2] + moai_ReLU[2] + 3 moai_z[2] ≤ 3

source

ReLUQuadratic

MathOptAI.ReLUQuadraticType
ReLUQuadratic() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

by the reformulation:

\[\begin{aligned} x = y - z \\ y \cdot z = 0 \\ y, z \ge 0 @@ -337,7 +337,7 @@ ├ x[1] - moai_ReLU[1] + moai_z[1] = 0 ├ x[2] - moai_ReLU[2] + moai_z[2] = 0 ├ moai_ReLU[1]*moai_z[1] = 0 - └ moai_ReLU[2]*moai_z[2] = 0

source

ReLUSOS1

MathOptAI.ReLUSOS1Type
ReLUSOS1() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

by the reformulation:

\[\begin{aligned} + └ moai_ReLU[2]*moai_z[2] = 0

source

ReLUSOS1

MathOptAI.ReLUSOS1Type
ReLUSOS1() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \max\{0, x\}\]

by the reformulation:

\[\begin{aligned} x = y - z \\ [y, z] \in SOS1 \\ y, z \ge 0 @@ -374,7 +374,7 @@ ├ x[1] - moai_ReLU[1] + moai_z[1] = 0 ├ x[2] - moai_ReLU[2] + moai_z[2] = 0 ├ [moai_ReLU[1], moai_z[1]] ∈ MathOptInterface.SOS1{Float64}([1.0, 2.0]) - └ [moai_ReLU[2], moai_z[2]] ∈ MathOptInterface.SOS1{Float64}([1.0, 2.0])

source

Scale

MathOptAI.ScaleType
Scale(
+  └ [moai_ReLU[2], moai_z[2]] ∈ MathOptInterface.SOS1{Float64}([1.0, 2.0])
source

Scale

MathOptAI.ScaleType
Scale(
     scale::Vector{T},
     bias::Vector{T},
 ) where {T} <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = Diag(scale)x + bias\]

Example

julia> using JuMP, MathOptAI
@@ -417,7 +417,7 @@
 julia> formulation
 ReducedSpace(Scale(scale, bias))
 ├ variables [0]
-└ constraints [0]
source

Sigmoid

MathOptAI.SigmoidType
Sigmoid() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \frac{1}{1 + e^{-x}}\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
+└ constraints [0]
source

Sigmoid

MathOptAI.SigmoidType
Sigmoid() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \frac{1}{1 + e^{-x}}\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -457,7 +457,7 @@
 julia> formulation
 ReducedSpace(Sigmoid())
 ├ variables [0]
-└ constraints [0]
source

SoftMax

MathOptAI.SoftMaxType
SoftMax() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \frac{e^{x}}{||e^{x}||_1}\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
+└ constraints [0]
source

SoftMax

MathOptAI.SoftMaxType
SoftMax() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \frac{e^{x}}{||e^{x}||_1}\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -503,7 +503,7 @@
 │ └ moai_SoftMax_denom
 └ constraints [2]
   ├ moai_SoftMax_denom ≥ 0
-  └ moai_SoftMax_denom - (0.0 + exp(x[2]) + exp(x[1])) = 0
source

SoftPlus

MathOptAI.SoftPlusType
SoftPlus(; beta = 1.0) <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \frac{1}{\beta} \log(1 + e^{\beta x})\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
+  └ moai_SoftMax_denom - (0.0 + exp(x[2]) + exp(x[1])) = 0
source

SoftPlus

MathOptAI.SoftPlusType
SoftPlus(; beta = 1.0) <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \frac{1}{\beta} \log(1 + e^{\beta x})\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -543,7 +543,7 @@
 julia> formulation
 ReducedSpace(SoftPlus(2.0))
 ├ variables [0]
-└ constraints [0]
source

Tanh

MathOptAI.TanhType
Tanh() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \tanh(x)\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
+└ constraints [0]
source

Tanh

MathOptAI.TanhType
Tanh() <: AbstractPredictor

An AbstractPredictor that represents the relationship:

\[y = \tanh(x)\]

as a smooth nonlinear constraint.

Example

julia> using JuMP, MathOptAI
 
 julia> model = Model();
 
@@ -583,14 +583,14 @@
 julia> formulation
 ReducedSpace(Tanh())
 ├ variables [0]
-└ constraints [0]
source

AbstractFormulation

Formulation

AbstractFormulation

Formulation

MathOptAI.FormulationType
struct Formulation{P<:AbstractPredictor} <: AbstractFormulation
     predictor::P
     variables::Vector{Any}
     constraints::Vector{Any}
-end

Fields

  • predictor: the predictor object used to build the formulation
  • variables: a vector of new decision variables added to the model
  • constraints: a vector of new constraints added to the model

Check the docstring of the predictor for an explanation of the formulation and the order of the elements in .variables and .constraints.

source

PipelineFormulation

MathOptAI.PipelineFormulationType
struct PipelineFormulation{P<:AbstractPredictor} <: AbstractFormulation
+end

Fields

  • predictor: the predictor object used to build the formulation
  • variables: a vector of new decision variables added to the model
  • constraints: a vector of new constraints added to the model

Check the docstring of the predictor for an explanation of the formulation and the order of the elements in .variables and .constraints.

source

PipelineFormulation

MathOptAI.PipelineFormulationType
struct PipelineFormulation{P<:AbstractPredictor} <: AbstractFormulation
     predictor::P
     layers::Vector{Any}
-end

Fields

  • predictor: the predictor object used to build the formulation
  • layers: the formulation associated with each of the layers in the pipeline
source

Extensions

MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
+end

Fields

  • predictor: the predictor object used to build the formulation
  • layers: the formulation associated with each of the layers in the pipeline
source

Extensions

MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::MathOptAI.Quantile{<:AbstractGPs.PosteriorGP},
     x::Vector,
@@ -618,7 +618,7 @@
  moai_quantile[2]
 
 julia> @objective(model, Max, y[2] - y[1])
-moai_quantile[2] - moai_quantile[1]
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::Union{DecisionTree.Root,DecisionTree.DecisionTreeClassifier},
     x::Vector,
@@ -647,7 +647,7 @@
 
 julia> y
 1-element Vector{VariableRef}:
- moai_BinaryDecisionTree_value
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(predictor::DecisionTree.Root)

Convert a binary decision tree from DecisionTree.jl to a BinaryDecisionTree.

Example

julia> using MathOptAI, DecisionTree
+ moai_BinaryDecisionTree_value
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(predictor::DecisionTree.Root)

Convert a binary decision tree from DecisionTree.jl to a BinaryDecisionTree.

Example

julia> using MathOptAI, DecisionTree
 
 julia> truth(x::Vector) = x[1] <= 0.5 ? -2 : (x[2] <= 0.3 ? 3 : 4)
 truth (generic function with 1 method)
@@ -665,7 +665,7 @@
 Depth:  2
 
 julia> predictor = MathOptAI.build_predictor(tree)
-BinaryDecisionTree{Float64,Int64} [leaves=3, depth=2]
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::Flux.Chain,
     x::Vector;
@@ -689,7 +689,7 @@
 
 julia> y
 1-element Vector{VariableRef}:
- moai_Affine[1]
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(
     predictor::Flux.Chain;
     config::Dict = Dict{Any,Any}(),
     gray_box::Bool = false,
@@ -714,7 +714,7 @@
 Pipeline with layers:
  * Affine(A, b) [input: 1, output: 16]
  * ReLUQuadratic()
- * Affine(A, b) [input: 16, output: 1]
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::GLM.GeneralizedLinearModel{
         GLM.GlmResp{Vector{Float64},GLM.Bernoulli{Float64},GLM.LogitLink},
@@ -741,7 +741,7 @@
 
 julia> y
 1-element Vector{VariableRef}:
- moai_Sigmoid[1]
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::GLM.LinearModel,
     x::Vector;
@@ -760,7 +760,7 @@
 
 julia> y
 1-element Vector{VariableRef}:
- moai_Affine[1]
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(
     predictor::GLM.GeneralizedLinearModel{
         GLM.GlmResp{Vector{Float64},GLM.Bernoulli{Float64},GLM.LogitLink},
     };
@@ -774,14 +774,14 @@
 julia> predictor = MathOptAI.build_predictor(model)
 Pipeline with layers:
  * Affine(A, b) [input: 2, output: 1]
- * Sigmoid()
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(predictor::GLM.LinearModel)

Convert a trained linear model from GLM.jl to an Affine layer.

Example

julia> using GLM, MathOptAI
+ * Sigmoid()
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(predictor::GLM.LinearModel)

Convert a trained linear model from GLM.jl to an Affine layer.

Example

julia> using GLM, MathOptAI
 
 julia> X, Y = rand(10, 2), rand(10);
 
 julia> model = GLM.lm(X, Y);
 
 julia> predictor = MathOptAI.build_predictor(model)
-Affine(A, b) [input: 2, output: 1]
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::Tuple{<:Lux.Chain,<:NamedTuple,<:NamedTuple},
     x::Vector;
@@ -815,7 +815,7 @@
 
 julia> y
 1-element Vector{VariableRef}:
- moai_Affine[1]
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(
     predictor::Tuple{<:Lux.Chain,<:NamedTuple,<:NamedTuple};
     config::Dict = Dict{Any,Any}(),
 )

Convert a trained neural network from Lux.jl to a Pipeline.

Supported layers

  • Lux.Dense
  • Lux.Scale

Supported activation functions

  • Lux.relu
  • Lux.sigmoid
  • Lux.softplus
  • Lux.softmax
  • Lux.tanh

Keyword arguments

  • config: a dictionary that maps supported Lux activation functions to AbstractPredictors that control how the activation functions are reformulated. For example, Lux.sigmoid => MathOptAI.Sigmoid() or Lux.relu => MathOptAI.QuadraticReLU().

Example

julia> using Lux, MathOptAI, Random
@@ -847,7 +847,7 @@
 Pipeline with layers:
  * Affine(A, b) [input: 1, output: 16]
  * ReLUQuadratic()
- * Affine(A, b) [input: 16, output: 1]
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::MathOptAI.PytorchModel,
     x::Vector;
@@ -856,13 +856,13 @@
     gray_box::Bool = false,
     gray_box_hessian::Bool = false,
     gray_box_device::String = "cpu",
-)

Add a trained neural network from PyTorch via PythonCall.jl to model.

Supported layers

  • nn.Linear
  • nn.ReLU
  • nn.Sequential
  • nn.Sigmoid
  • nn.Softplus
  • nn.Tanh

Keyword arguments

  • config: a dictionary that maps Symbols to AbstractPredictors that control how the activation functions are reformulated. For example, :Sigmoid => MathOptAI.Sigmoid() or :ReLU => MathOptAI.QuadraticReLU(). The supported Symbols are :ReLU, :Sigmoid, :SoftPlus, and :Tanh.
  • gray_box: if true, the neural network is added as a user-defined nonlinear operator, with gradients provided by torch.func.jacrev.
  • gray_box_hessian: if true, the gray box additionally computes the Hessian of the output using torch.func.hessian.
  • gray_box_device: device used to construct PyTorch tensors, e.g. "cuda" to run on an Nvidia GPU.
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(
+)

Add a trained neural network from PyTorch via PythonCall.jl to model.

Supported layers

  • nn.Linear
  • nn.ReLU
  • nn.Sequential
  • nn.Sigmoid
  • nn.Softplus
  • nn.Tanh

Keyword arguments

  • config: a dictionary that maps Symbols to AbstractPredictors that control how the activation functions are reformulated. For example, :Sigmoid => MathOptAI.Sigmoid() or :ReLU => MathOptAI.QuadraticReLU(). The supported Symbols are :ReLU, :Sigmoid, :SoftPlus, and :Tanh.
  • gray_box: if true, the neural network is added as a user-defined nonlinear operator, with gradients provided by torch.func.jacrev.
  • gray_box_hessian: if true, the gray box additionally computes the Hessian of the output using torch.func.hessian.
  • gray_box_device: device used to construct PyTorch tensors, e.g. "cuda" to run on an Nvidia GPU.
source
MathOptAI.build_predictorMethod
MathOptAI.build_predictor(
     predictor::MathOptAI.PytorchModel;
     config::Dict = Dict{Any,Any}(),
     gray_box::Bool = false,
     gray_box_hessian::Bool = false,
     gray_box_device::String = "cpu",
-)

Convert a trained neural network from PyTorch via PythonCall.jl to a Pipeline.

Supported layers

  • nn.Linear
  • nn.ReLU
  • nn.Sequential
  • nn.Sigmoid
  • nn.Softplus
  • nn.Tanh

Keyword arguments

  • config: a dictionary that maps Symbols to AbstractPredictors that control how the activation functions are reformulated. For example, :Sigmoid => MathOptAI.Sigmoid() or :ReLU => MathOptAI.QuadraticReLU(). The supported Symbols are :ReLU, :Sigmoid, :SoftPlus, and :Tanh.
  • gray_box: if true, the neural network is added as a user-defined nonlinear operator, with gradients provided by torch.func.jacrev.
  • gray_box_hessian: if true, the gray box additionally computes the Hessian of the output using torch.func.hessian.
  • gray_box_device: device used to construct PyTorch tensors, e.g. "cuda" to run on an Nvidia GPU.
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
+)

Convert a trained neural network from PyTorch via PythonCall.jl to a Pipeline.

Supported layers

  • nn.Linear
  • nn.ReLU
  • nn.Sequential
  • nn.Sigmoid
  • nn.Softplus
  • nn.Tanh

Keyword arguments

  • config: a dictionary that maps Symbols to AbstractPredictors that control how the activation functions are reformulated. For example, :Sigmoid => MathOptAI.Sigmoid() or :ReLU => MathOptAI.QuadraticReLU(). The supported Symbols are :ReLU, :Sigmoid, :SoftPlus, and :Tanh.
  • gray_box: if true, the neural network is added as a user-defined nonlinear operator, with gradients provided by torch.func.jacrev.
  • gray_box_hessian: if true, the gray box additionally computes the Hessian of the output using torch.func.hessian.
  • gray_box_device: device used to construct PyTorch tensors, e.g. "cuda" to run on an Nvidia GPU.
source
MathOptAI.add_predictorMethod
MathOptAI.add_predictor(
     model::JuMP.AbstractModel,
     predictor::StatsModels.TableRegressionModel,
     x::DataFrames.DataFrame;
@@ -891,4 +891,4 @@
  moai_Affine[1]
  moai_Affine[1]
  moai_Affine[1]
- moai_Affine[1]
source
+ moai_Affine[1]source diff --git a/dev/assets/documenter.js b/dev/assets/documenter.js index 82252a11..7d68cd80 100644 --- a/dev/assets/documenter.js +++ b/dev/assets/documenter.js @@ -612,176 +612,194 @@ function worker_function(documenterSearchIndex, documenterBaseURL, filters) { }; } -// `worker = Threads.@spawn worker_function(documenterSearchIndex)`, but in JavaScript! -const filters = [ - ...new Set(documenterSearchIndex["docs"].map((x) => x.category)), -]; -const worker_str = - "(" + - worker_function.toString() + - ")(" + - JSON.stringify(documenterSearchIndex["docs"]) + - "," + - JSON.stringify(documenterBaseURL) + - "," + - JSON.stringify(filters) + - ")"; -const worker_blob = new Blob([worker_str], { type: "text/javascript" }); -const worker = new Worker(URL.createObjectURL(worker_blob)); - /////// SEARCH MAIN /////// -// Whether the worker is currently handling a search. This is a boolean -// as the worker only ever handles 1 or 0 searches at a time. -var worker_is_running = false; - -// The last search text that was sent to the worker. This is used to determine -// if the worker should be launched again when it reports back results. -var last_search_text = ""; - -// The results of the last search. This, in combination with the state of the filters -// in the DOM, is used compute the results to display on calls to update_search. -var unfiltered_results = []; - -// Which filter is currently selected -var selected_filter = ""; - -$(document).on("input", ".documenter-search-input", function (event) { - if (!worker_is_running) { - launch_search(); - } -}); - -function launch_search() { - worker_is_running = true; - last_search_text = $(".documenter-search-input").val(); - worker.postMessage(last_search_text); -} - -worker.onmessage = function (e) { - if (last_search_text !== $(".documenter-search-input").val()) { - launch_search(); - } else { - worker_is_running = false; - } - - unfiltered_results = e.data; - update_search(); -}; +function runSearchMainCode() { + // `worker = Threads.@spawn worker_function(documenterSearchIndex)`, but in JavaScript! + const filters = [ + ...new Set(documenterSearchIndex["docs"].map((x) => x.category)), + ]; + const worker_str = + "(" + + worker_function.toString() + + ")(" + + JSON.stringify(documenterSearchIndex["docs"]) + + "," + + JSON.stringify(documenterBaseURL) + + "," + + JSON.stringify(filters) + + ")"; + const worker_blob = new Blob([worker_str], { type: "text/javascript" }); + const worker = new Worker(URL.createObjectURL(worker_blob)); + + // Whether the worker is currently handling a search. This is a boolean + // as the worker only ever handles 1 or 0 searches at a time. + var worker_is_running = false; + + // The last search text that was sent to the worker. This is used to determine + // if the worker should be launched again when it reports back results. + var last_search_text = ""; + + // The results of the last search. This, in combination with the state of the filters + // in the DOM, is used compute the results to display on calls to update_search. + var unfiltered_results = []; + + // Which filter is currently selected + var selected_filter = ""; + + $(document).on("input", ".documenter-search-input", function (event) { + if (!worker_is_running) { + launch_search(); + } + }); -$(document).on("click", ".search-filter", function () { - if ($(this).hasClass("search-filter-selected")) { - selected_filter = ""; - } else { - selected_filter = $(this).text().toLowerCase(); + function launch_search() { + worker_is_running = true; + last_search_text = $(".documenter-search-input").val(); + worker.postMessage(last_search_text); } - // This updates search results and toggles classes for UI: - update_search(); -}); + worker.onmessage = function (e) { + if (last_search_text !== $(".documenter-search-input").val()) { + launch_search(); + } else { + worker_is_running = false; + } -/** - * Make/Update the search component - */ -function update_search() { - let querystring = $(".documenter-search-input").val(); + unfiltered_results = e.data; + update_search(); + }; - if (querystring.trim()) { - if (selected_filter == "") { - results = unfiltered_results; + $(document).on("click", ".search-filter", function () { + if ($(this).hasClass("search-filter-selected")) { + selected_filter = ""; } else { - results = unfiltered_results.filter((result) => { - return selected_filter == result.category.toLowerCase(); - }); + selected_filter = $(this).text().toLowerCase(); } - let search_result_container = ``; - let modal_filters = make_modal_body_filters(); - let search_divider = `
`; + // This updates search results and toggles classes for UI: + update_search(); + }); - if (results.length) { - let links = []; - let count = 0; - let search_results = ""; - - for (var i = 0, n = results.length; i < n && count < 200; ++i) { - let result = results[i]; - if (result.location && !links.includes(result.location)) { - search_results += result.div; - count++; - links.push(result.location); - } - } + /** + * Make/Update the search component + */ + function update_search() { + let querystring = $(".documenter-search-input").val(); - if (count == 1) { - count_str = "1 result"; - } else if (count == 200) { - count_str = "200+ results"; + if (querystring.trim()) { + if (selected_filter == "") { + results = unfiltered_results; } else { - count_str = count + " results"; + results = unfiltered_results.filter((result) => { + return selected_filter == result.category.toLowerCase(); + }); } - let result_count = `
${count_str}
`; - search_result_container = ` + let search_result_container = ``; + let modal_filters = make_modal_body_filters(); + let search_divider = `
`; + + if (results.length) { + let links = []; + let count = 0; + let search_results = ""; + + for (var i = 0, n = results.length; i < n && count < 200; ++i) { + let result = results[i]; + if (result.location && !links.includes(result.location)) { + search_results += result.div; + count++; + links.push(result.location); + } + } + + if (count == 1) { + count_str = "1 result"; + } else if (count == 200) { + count_str = "200+ results"; + } else { + count_str = count + " results"; + } + let result_count = `
${count_str}
`; + + search_result_container = ` +
+ ${modal_filters} + ${search_divider} + ${result_count} +
+ ${search_results} +
+
+ `; + } else { + search_result_container = `
${modal_filters} ${search_divider} - ${result_count} -
- ${search_results} -
-
+
0 result(s)
+ +
No result found!
`; - } else { - search_result_container = ` -
- ${modal_filters} - ${search_divider} -
0 result(s)
-
-
No result found!
- `; - } + } - if ($(".search-modal-card-body").hasClass("is-justify-content-center")) { - $(".search-modal-card-body").removeClass("is-justify-content-center"); - } + if ($(".search-modal-card-body").hasClass("is-justify-content-center")) { + $(".search-modal-card-body").removeClass("is-justify-content-center"); + } - $(".search-modal-card-body").html(search_result_container); - } else { - if (!$(".search-modal-card-body").hasClass("is-justify-content-center")) { - $(".search-modal-card-body").addClass("is-justify-content-center"); + $(".search-modal-card-body").html(search_result_container); + } else { + if (!$(".search-modal-card-body").hasClass("is-justify-content-center")) { + $(".search-modal-card-body").addClass("is-justify-content-center"); + } + + $(".search-modal-card-body").html(` +
Type something to get started!
+ `); } + } - $(".search-modal-card-body").html(` -
Type something to get started!
- `); + /** + * Make the modal filter html + * + * @returns string + */ + function make_modal_body_filters() { + let str = filters + .map((val) => { + if (selected_filter == val.toLowerCase()) { + return `${val}`; + } else { + return `${val}`; + } + }) + .join(""); + + return ` +
+ Filters: + ${str} +
`; } } -/** - * Make the modal filter html - * - * @returns string - */ -function make_modal_body_filters() { - let str = filters - .map((val) => { - if (selected_filter == val.toLowerCase()) { - return `${val}`; - } else { - return `${val}`; - } - }) - .join(""); - - return ` -
- Filters: - ${str} -
`; +function waitUntilSearchIndexAvailable() { + // It is possible that the documenter.js script runs before the page + // has finished loading and documenterSearchIndex gets defined. + // So we need to wait until the search index actually loads before setting + // up all the search-related stuff. + if (typeof documenterSearchIndex !== "undefined") { + runSearchMainCode(); + } else { + console.warn("Search Index not available, waiting"); + setTimeout(waitUntilSearchIndexAvailable, 1000); + } } +// The actual entry point to the search code +waitUntilSearchIndexAvailable(); + }) //////////////////////////////////////////////////////////////////////////////// require(['jquery'], function($) { diff --git a/dev/developers/design_principles/index.html b/dev/developers/design_principles/index.html index 87c5673c..1658d320 100644 --- a/dev/developers/design_principles/index.html +++ b/dev/developers/design_principles/index.html @@ -23,4 +23,4 @@ model_reduced_space = Model() @variable(model_reduced_space, x[1:2]) @variable(model_reduced_space, y[1:3]) -@constraint(model_reduced_space, y .== max.(0, layer.A * x + layer.b))

In general, the full-space formulations have more variables and constraints but simpler nonlinear expressions, whereas the reduced-space formulations have fewer variables and constraints but more complicated nonlinear expressions.

MathOptAI.jl implements the full-space formulation by default, but some layers support the reduced-space formulation with the ReducedSpace wrapper.

+@constraint(model_reduced_space, y .== max.(0, layer.A * x + layer.b))

In general, the full-space formulations have more variables and constraints but simpler nonlinear expressions, whereas the reduced-space formulations have fewer variables and constraints but more complicated nonlinear expressions.

MathOptAI.jl implements the full-space formulation by default, but some layers support the reduced-space formulation with the ReducedSpace wrapper.

diff --git a/dev/index.html b/dev/index.html index 09f7094f..dc7eb50d 100644 --- a/dev/index.html +++ b/dev/index.html @@ -27,4 +27,4 @@ moai_SoftMax[7] moai_SoftMax[8] moai_SoftMax[9] - moai_SoftMax[10]

Getting help

This package is under active development. For help, questions, comments, and suggestions, please open a GitHub issue.

Inspiration

This project is mainly inspired by two existing projects:

Other works, from which we took less inspiration, include:

The 2024 paper of López-Flores et al. is an excellent summary of the state of the field at the time that we started development of MathOptAI.

López-Flores, F.J., Ramírez-Márquez, C., Ponce-Ortega J.M. (2024). Process Systems Engineering Tools for Optimization of Trained Machine Learning Models: Comparative and Perspective. Industrial & Engineering Chemistry Research, 63(32), 13966-13979. DOI: 10.1021/acs.iecr.4c00632

+ moai_SoftMax[10]

Getting help

This package is under active development. For help, questions, comments, and suggestions, please open a GitHub issue.

Inspiration

This project is mainly inspired by two existing projects:

Other works, from which we took less inspiration, include:

The 2024 paper of López-Flores et al. is an excellent summary of the state of the field at the time that we started development of MathOptAI.

López-Flores, F.J., Ramírez-Márquez, C., Ponce-Ortega J.M. (2024). Process Systems Engineering Tools for Optimization of Trained Machine Learning Models: Comparative and Perspective. Industrial & Engineering Chemistry Research, 63(32), 13966-13979. DOI: 10.1021/acs.iecr.4c00632

diff --git a/dev/manual/AbstractGPs/index.html b/dev/manual/AbstractGPs/index.html index 96a36916..1d8bd39b 100644 --- a/dev/manual/AbstractGPs/index.html +++ b/dev/manual/AbstractGPs/index.html @@ -3,4 +3,4 @@ moai_quantile[1] moai_quantile[2]
julia> formulationQuantile(_, [0.1, 0.9]) ├ variables [0] -└ constraints [0]
julia> @objective(model, Max, y[2] - y[1])moai_quantile[2] - moai_quantile[1] +└ constraints [0]
julia> @objective(model, Max, y[2] - y[1])moai_quantile[2] - moai_quantile[1] diff --git a/dev/manual/DecisionTree/index.html b/dev/manual/DecisionTree/index.html index 10c28d90..b9e6b37a 100644 --- a/dev/manual/DecisionTree/index.html +++ b/dev/manual/DecisionTree/index.html @@ -15,4 +15,4 @@ ├ moai_BinaryDecisionTree_z[2] --> {x[2] ≤ 0.41966499895337794} ├ moai_BinaryDecisionTree_z[3] --> {x[1] ≥ 0.4743457016210958} ├ moai_BinaryDecisionTree_z[3] --> {x[2] ≥ 0.41966499895337794} - └ 2 moai_BinaryDecisionTree_z[1] - 3 moai_BinaryDecisionTree_z[2] - 4 moai_BinaryDecisionTree_z[3] + moai_BinaryDecisionTree_value = 0 + └ 2 moai_BinaryDecisionTree_z[1] - 3 moai_BinaryDecisionTree_z[2] - 4 moai_BinaryDecisionTree_z[3] + moai_BinaryDecisionTree_value = 0 diff --git a/dev/manual/Flux/index.html b/dev/manual/Flux/index.html index 29b9a2d0..2a3a187d 100644 --- a/dev/manual/Flux/index.html +++ b/dev/manual/Flux/index.html @@ -5,8 +5,8 @@ │ ├ moai_Affine[1] │ └ moai_Affine[2] └ constraints [2] - ├ -0.03577340021729469 x[1] - moai_Affine[1] = 0 - └ 0.13506142795085907 x[1] - moai_Affine[2] = 0 + ├ 0.7435214519500732 x[1] - moai_Affine[1] = 0 + └ -1.2505210638046265 x[1] - moai_Affine[2] = 0 MathOptAI.ReLU() ├ variables [2] │ ├ moai_ReLU[1] @@ -21,9 +21,9 @@ │ └ moai_Affine[1] └ constraints [2] ├ moai_Affine[1] ≤ 0 - └ -0.8536216616630554 moai_ReLU[1] - 0.09841794520616531 moai_ReLU[2] - moai_Affine[1] = 0

Reduced-space

Use the reduced_space = true keyword to formulate a reduced-space model:

julia> using JuMP, Flux, MathOptAI
julia> predictor = Flux.Chain(Flux.Dense(1 => 2, Flux.relu), Flux.Dense(2 => 1));
julia> model = Model();
julia> @variable(model, x[1:1]);
julia> y, formulation = + └ -1.1201879978179932 moai_ReLU[1] - 0.5682011842727661 moai_ReLU[2] - moai_Affine[1] = 0

Reduced-space

Use the reduced_space = true keyword to formulate a reduced-space model:

julia> using JuMP, Flux, MathOptAI
julia> predictor = Flux.Chain(Flux.Dense(1 => 2, Flux.relu), Flux.Dense(2 => 1));
julia> model = Model();
julia> @variable(model, x[1:1]);
julia> y, formulation = MathOptAI.add_predictor(model, predictor, x; reduced_space = true);
julia> y1-element Vector{JuMP.NonlinearExpr}: - ((+(0.0) + (-0.10686518996953964 * max(0.0, 0.8712630271911621 x[1]))) + (-1.3386929035186768 * max(0.0, 0.581385612487793 x[1]))) + 0.0
julia> formulationReducedSpace(Affine(A, b) [input: 1, output: 2]) + ((+(0.0) + (0.7965260148048401 * max(0.0, 1.2558213472366333 x[1]))) + (1.025068998336792 * max(0.0, -0.09778439253568649 x[1]))) + 0.0
julia> formulationReducedSpace(Affine(A, b) [input: 1, output: 2]) ├ variables [0] └ constraints [0] ReducedSpace(MathOptAI.ReLU()) @@ -48,8 +48,8 @@ │ ├ moai_Affine[1] │ └ moai_Affine[2] └ constraints [2] - ├ 0.6873579621315002 x[1] - moai_Affine[1] = 0 - └ 0.30008774995803833 x[1] - moai_Affine[2] = 0 + ├ -0.9636475443840027 x[1] - moai_Affine[1] = 0 + └ -0.224209263920784 x[1] - moai_Affine[2] = 0 MathOptAI.ReLUSOS1() ├ variables [4] │ ├ moai_ReLU[1] @@ -66,6 +66,5 @@ Affine(A, b) [input: 2, output: 1] ├ variables [1] │ └ moai_Affine[1] -└ constraints [2] - ├ moai_Affine[1] ≥ 0 - └ 1.2513288259506226 moai_ReLU[1] + 1.113552212715149 moai_ReLU[2] - moai_Affine[1] = 0
+└ constraints [1] + └ 1.399377703666687 moai_ReLU[1] - 0.2851664125919342 moai_ReLU[2] - moai_Affine[1] = 0 diff --git a/dev/manual/GLM/index.html b/dev/manual/GLM/index.html index 6f62af24..22c6e6e2 100644 --- a/dev/manual/GLM/index.html +++ b/dev/manual/GLM/index.html @@ -4,12 +4,12 @@ ├ variables [1] │ └ moai_Affine[1] └ constraints [1] - └ 0.7748493086533497 x[1] + 0.49679732292081996 x[2] - moai_Affine[1] = 0

Logistic regression

The input x to add_predictor must be a vector with the same number of elements as columns in the training matrix. The return is a vector of JuMP variables with a single element.

julia> using GLM, JuMP, MathOptAI
julia> X, Y = rand(10, 2), rand(Bool, 10);
julia> predictor = GLM.glm(X, Y, GLM.Bernoulli());
julia> model = Model();
julia> @variable(model, x[1:2]);
julia> y, formulation = MathOptAI.add_predictor(model, predictor, x);
julia> y1-element Vector{JuMP.VariableRef}: + └ 0.5943052098775468 x[1] + 0.3710399064355762 x[2] - moai_Affine[1] = 0

Logistic regression

The input x to add_predictor must be a vector with the same number of elements as columns in the training matrix. The return is a vector of JuMP variables with a single element.

julia> using GLM, JuMP, MathOptAI
julia> X, Y = rand(10, 2), rand(Bool, 10);
julia> predictor = GLM.glm(X, Y, GLM.Bernoulli());
julia> model = Model();
julia> @variable(model, x[1:2]);
julia> y, formulation = MathOptAI.add_predictor(model, predictor, x);
julia> y1-element Vector{JuMP.VariableRef}: moai_Sigmoid[1]
julia> formulationAffine(A, b) [input: 2, output: 1] ├ variables [1] │ └ moai_Affine[1] └ constraints [1] - └ -1.450469893146627 x[1] + 1.2424920083768944 x[2] - moai_Affine[1] = 0 + └ -0.38816394476620386 x[1] + 0.13876572350123043 x[2] - moai_Affine[1] = 0 MathOptAI.Sigmoid() ├ variables [1] │ └ moai_Sigmoid[1] @@ -25,4 +25,4 @@ moai_Affine[1] moai_Affine[1] moai_Affine[1] - moai_Affine[1]
+ moai_Affine[1] diff --git a/dev/manual/Lux/index.html b/dev/manual/Lux/index.html index a75503e0..16e55a2f 100644 --- a/dev/manual/Lux/index.html +++ b/dev/manual/Lux/index.html @@ -9,8 +9,8 @@ │ ├ moai_Affine[1] │ └ moai_Affine[2] └ constraints [2] - ├ 1.2049298286437988 x[1] - moai_Affine[1] = 0 - └ 0.816858172416687 x[1] - moai_Affine[2] = 0 + ├ 0.0008783403318375349 x[1] - moai_Affine[1] = 0 + └ -0.2703956365585327 x[1] - moai_Affine[2] = 0 MathOptAI.ReLU() ├ variables [2] │ ├ moai_ReLU[1] @@ -24,14 +24,14 @@ ├ variables [1] │ └ moai_Affine[1] └ constraints [2] - ├ moai_Affine[1] ≤ 0 - └ -0.4297545552253723 moai_ReLU[1] - 0.579918384552002 moai_ReLU[2] - moai_Affine[1] = 0

Reduced-space

Use the reduced_space = true keyword to formulate a reduced-space model:

julia> using JuMP, Lux, MathOptAI, Random
julia> rng = Random.MersenneTwister();
julia> chain = Lux.Chain(Lux.Dense(1 => 2, Lux.relu), Lux.Dense(2 => 1))Chain( + ├ moai_Affine[1] ≥ 0 + └ 0.9404870271682739 moai_ReLU[1] + 0.8026401996612549 moai_ReLU[2] - moai_Affine[1] = 0

Reduced-space

Use the reduced_space = true keyword to formulate a reduced-space model:

julia> using JuMP, Lux, MathOptAI, Random
julia> rng = Random.MersenneTwister();
julia> chain = Lux.Chain(Lux.Dense(1 => 2, Lux.relu), Lux.Dense(2 => 1))Chain( layer_1 = Dense(1 => 2, relu), # 4 parameters layer_2 = Dense(2 => 1), # 3 parameters ) # Total: 7 parameters, # plus 0 states.
julia> parameters, state = Lux.setup(rng, chain);
julia> predictor = (chain, parameters, state);
julia> model = Model();
julia> @variable(model, x[1:1]);
julia> y, formulation = MathOptAI.add_predictor(model, predictor, x; reduced_space = true);
julia> y1-element Vector{JuMP.NonlinearExpr}: - ((+(0.0) + (0.2827740013599396 * max(0.0, -0.8161349296569824 x[1]))) + (-0.03431731089949608 * max(0.0, 0.2235037237405777 x[1]))) + 0.0
julia> formulationReducedSpace(Affine(A, b) [input: 1, output: 2]) + ((+(0.0) + (-0.13756495714187622 * max(0.0, 0.21255970001220703 x[1]))) + (-0.9397847056388855 * max(0.0, 1.045299768447876 x[1]))) + 0.0
julia> formulationReducedSpace(Affine(A, b) [input: 1, output: 2]) ├ variables [0] └ constraints [0] ReducedSpace(MathOptAI.ReLU()) @@ -54,8 +54,8 @@ │ ├ moai_Affine[1] │ └ moai_Affine[2] └ constraints [2] - ├ 1.1088364124298096 x[1] - moai_Affine[1] = 0 - └ 1.0128564834594727 x[1] - moai_Affine[2] = 0 + ├ 0.45079728960990906 x[1] - moai_Affine[1] = 0 + └ 0.8464735746383667 x[1] - moai_Affine[2] = 0 MathOptAI.ReLUSOS1() ├ variables [4] │ ├ moai_ReLU[1] @@ -72,6 +72,5 @@ Affine(A, b) [input: 2, output: 1] ├ variables [1] │ └ moai_Affine[1] -└ constraints [2] - ├ moai_Affine[1] ≤ 0 - └ -0.2850050628185272 moai_ReLU[1] - 1.3850866556167603 moai_ReLU[2] - moai_Affine[1] = 0
+└ constraints [1] + └ 0.4085981845855713 moai_ReLU[1] - 1.0271365642547607 moai_ReLU[2] - moai_Affine[1] = 0 diff --git a/dev/manual/PyTorch/index.html b/dev/manual/PyTorch/index.html index faba8bf9..d16054a9 100644 --- a/dev/manual/PyTorch/index.html +++ b/dev/manual/PyTorch/index.html @@ -77,4 +77,4 @@ ├ variables [1] │ └ moai_Affine[1] └ constraints [1] - └ -0.4644489884376526 moai_ReLU[1] + 0.5382549166679382 moai_ReLU[2] - moai_Affine[1] = 0.5698285102844238 + └ -0.4644489884376526 moai_ReLU[1] + 0.5382549166679382 moai_ReLU[2] - moai_Affine[1] = 0.5698285102844238 diff --git a/dev/manual/predictors/index.html b/dev/manual/predictors/index.html index dc585d81..12b2f5c4 100644 --- a/dev/manual/predictors/index.html +++ b/dev/manual/predictors/index.html @@ -1,2 +1,2 @@ -Predictors · MathOptAI.jl

Predictors

The main entry point for embedding prediction models into JuMP is add_predictor.

All methods use the form y, formulation = MathOptAI.add_predictor(model, predictor, x) to add the relationship y = predictor(x) to model.

Supported predictors

The following predictors are supported. See their docstrings for details:

PredictorRelationshipDimensions
Affine$f(x) = Ax + b$$M \rightarrow N$
BinaryDecisionTreeA binary decision tree$M \rightarrow 1$
GrayBox$f(x)$$M \rightarrow N$
Pipeline$f(x) = (l_1 \circ \ldots \circ l_N)(x)$$M \rightarrow N$
QuantileThe quantiles of a distribution$M \rightarrow N$
ReLU$f(x) = \max.(0, x)$$M \rightarrow M$
ReLUBigM$f(x) = \max.(0, x)$$M \rightarrow M$
ReLUQuadratic$f(x) = \max.(0, x)$$M \rightarrow M$
ReLUSOS1$f(x) = \max.(0, x)$$M \rightarrow M$
Scale$f(x) = scale .* x .+ bias$$M \rightarrow M$
Sigmoid$f(x) = \frac{1}{1 + e^{-x}}$$M \rightarrow M$
SoftMax$f(x) = \frac{e^x}{\sum e^{x_i}}$$M \rightarrow M$
SoftPlus$f(x) = \frac{1}{\beta} \log(1 + e^{\beta x})$$M \rightarrow M$
Tanh$f(x) = \tanh.(x)$$M \rightarrow M$

Note that some predictors, such as the ReLU ones, offer multiple formulations of the same mathematical relationship. The ''right'' choice is solver- and problem-dependent.

ReLU

There are a number of different mathematical formulations for the rectified linear unit (ReLU).

  • ReLU: requires the solver to support the max nonlinear operator.
  • ReLUBigM: requires the solver to support mixed-integer linear programs, and requires the user to have prior knowledge of a suitable value for the "big-M" parameter.
  • ReLUQuadratic: requires the solver to support quadratic equality constraints
  • ReLUSOS1: requires the solver to support SOS-I constraints.

The correct choice for which ReLU formulation to use is problem- and solver-dependent.

+Predictors · MathOptAI.jl

Predictors

The main entry point for embedding prediction models into JuMP is add_predictor.

All methods use the form y, formulation = MathOptAI.add_predictor(model, predictor, x) to add the relationship y = predictor(x) to model.

Supported predictors

The following predictors are supported. See their docstrings for details:

PredictorRelationshipDimensions
Affine$f(x) = Ax + b$$M \rightarrow N$
BinaryDecisionTreeA binary decision tree$M \rightarrow 1$
GrayBox$f(x)$$M \rightarrow N$
Pipeline$f(x) = (l_1 \circ \ldots \circ l_N)(x)$$M \rightarrow N$
QuantileThe quantiles of a distribution$M \rightarrow N$
ReLU$f(x) = \max.(0, x)$$M \rightarrow M$
ReLUBigM$f(x) = \max.(0, x)$$M \rightarrow M$
ReLUQuadratic$f(x) = \max.(0, x)$$M \rightarrow M$
ReLUSOS1$f(x) = \max.(0, x)$$M \rightarrow M$
Scale$f(x) = scale .* x .+ bias$$M \rightarrow M$
Sigmoid$f(x) = \frac{1}{1 + e^{-x}}$$M \rightarrow M$
SoftMax$f(x) = \frac{e^x}{\sum e^{x_i}}$$M \rightarrow M$
SoftPlus$f(x) = \frac{1}{\beta} \log(1 + e^{\beta x})$$M \rightarrow M$
Tanh$f(x) = \tanh.(x)$$M \rightarrow M$

Note that some predictors, such as the ReLU ones, offer multiple formulations of the same mathematical relationship. The ''right'' choice is solver- and problem-dependent.

ReLU

There are a number of different mathematical formulations for the rectified linear unit (ReLU).

  • ReLU: requires the solver to support the max nonlinear operator.
  • ReLUBigM: requires the solver to support mixed-integer linear programs, and requires the user to have prior knowledge of a suitable value for the "big-M" parameter.
  • ReLUQuadratic: requires the solver to support quadratic equality constraints
  • ReLUSOS1: requires the solver to support SOS-I constraints.

The correct choice for which ReLU formulation to use is problem- and solver-dependent.

diff --git a/dev/tutorials/decision_trees/index.html b/dev/tutorials/decision_trees/index.html index 078fc00c..4bd5b13d 100644 --- a/dev/tutorials/decision_trees/index.html +++ b/dev/tutorials/decision_trees/index.html @@ -243,4 +243,4 @@ 5998 │ 5998 1165 2.81 x_merit[5998] moai_BinaryDecisionTree_valu ⋯ 5999 │ 5999 1400 3.43 x_merit[5999] moai_BinaryDecisionTree_valu 6000 │ 6000 1097 2.65 x_merit[6000] moai_BinaryDecisionTree_valu - 3 columns and 5985 rows omitted

Solution analysis

We expect that just under 2,500 students will enroll:

julia> sum(evaluate_df.enroll_sol)3530.0

We awarded merit scholarships to approximately 1 in 6 students:

julia> count(evaluate_df.merit_sol .> 1e-5)1278

The average merit scholarship was worth just under $1,000:

julia> 1_000 * Statistics.mean(evaluate_df.merit_sol[evaluate_df.merit_sol.>1e-5])938.6854460093895

This page was generated using Literate.jl.

+ 3 columns and 5985 rows omitted

Solution analysis

We expect that just under 2,500 students will enroll:

julia> sum(evaluate_df.enroll_sol)3530.0

We awarded merit scholarships to approximately 1 in 6 students:

julia> count(evaluate_df.merit_sol .> 1e-5)1278

The average merit scholarship was worth just under $1,000:

julia> 1_000 * Statistics.mean(evaluate_df.merit_sol[evaluate_df.merit_sol.>1e-5])938.6854460093895

This page was generated using Literate.jl.

diff --git a/dev/tutorials/gaussian/0b8fc589.svg b/dev/tutorials/gaussian/0b8fc589.svg new file mode 100644 index 00000000..52aef7d8 --- /dev/null +++ b/dev/tutorials/gaussian/0b8fc589.svg @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev/tutorials/gaussian/15d1ac5f.svg b/dev/tutorials/gaussian/15d1ac5f.svg new file mode 100644 index 00000000..01f54795 --- /dev/null +++ b/dev/tutorials/gaussian/15d1ac5f.svg @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev/tutorials/gaussian/1e793cb0.svg b/dev/tutorials/gaussian/1e793cb0.svg deleted file mode 100644 index 48ea0228..00000000 --- a/dev/tutorials/gaussian/1e793cb0.svg +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dev/tutorials/gaussian/320bd1f5.svg b/dev/tutorials/gaussian/320bd1f5.svg deleted file mode 100644 index 6ede6d64..00000000 --- a/dev/tutorials/gaussian/320bd1f5.svg +++ /dev/null @@ -1,71 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dev/tutorials/gaussian/81a6e5df.svg b/dev/tutorials/gaussian/81a6e5df.svg deleted file mode 100644 index 7090dc04..00000000 --- a/dev/tutorials/gaussian/81a6e5df.svg +++ /dev/null @@ -1,74 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dev/tutorials/gaussian/8e88e20c.svg b/dev/tutorials/gaussian/9f473ebe.svg similarity index 86% rename from dev/tutorials/gaussian/8e88e20c.svg rename to dev/tutorials/gaussian/9f473ebe.svg index d29839b8..ccfc4d68 100644 --- a/dev/tutorials/gaussian/8e88e20c.svg +++ b/dev/tutorials/gaussian/9f473ebe.svg @@ -1,50 +1,50 @@ - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev/tutorials/gaussian/c6233b11.svg b/dev/tutorials/gaussian/c6233b11.svg new file mode 100644 index 00000000..32c64e33 --- /dev/null +++ b/dev/tutorials/gaussian/c6233b11.svg @@ -0,0 +1,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev/tutorials/gaussian/index.html b/dev/tutorials/gaussian/index.html index d53a4205..47dfdac2 100644 --- a/dev/tutorials/gaussian/index.html +++ b/dev/tutorials/gaussian/index.html @@ -6,13 +6,13 @@ import MathOptAI import Plots

Prediction model

Assume that we have some true underlying univariate function:

x_domain = 0:0.01:2π
 true_function(x) = sin(x)
-Plots.plot(x_domain, true_function.(x_domain); label = "truth")
Example block output

We don't know the function, but we have access to a limited set of noisy sample points:

N = 20
+Plots.plot(x_domain, true_function.(x_domain); label = "truth")
Example block output

We don't know the function, but we have access to a limited set of noisy sample points:

N = 20
 x_data = rand(x_domain, N)
 noisy_sampler(x) = true_function(x) + 0.25 * (2rand() - 1)
 y_data = noisy_sampler.(x_data)
-Plots.scatter!(x_data, y_data; label = "data")
Example block output

Using the data, we want to build a predictor y = predictor(x). One choice is a Gaussian Process:

fx = AbstractGPs.GP(AbstractGPs.Matern32Kernel())(x_data, 0.4)
+Plots.scatter!(x_data, y_data; label = "data")
Example block output

Using the data, we want to build a predictor y = predictor(x). One choice is a Gaussian Process:

fx = AbstractGPs.GP(AbstractGPs.Matern32Kernel())(x_data, 0.4)
 p_fx = AbstractGPs.posterior(fx, y_data)
-Plots.plot!(x_domain, p_fx; label = "GP", fillalpha = 0.1)
Example block output

Gaussian Processes fit a mean and variance function:

AbstractGPs.mean_and_var(p_fx, [π / 2])
([0.8883271845424766], [0.08904792388037508])

Decision model

Our goal for this JuMP model is to embed the Gaussian Process from AbstractGPs into the model and then solve for different fixed values of x to recreate the function that the Gaussian Process has learned to approximate.

First, create a JuMP model:

model = Model(Ipopt.Optimizer)
+Plots.plot!(x_domain, p_fx; label = "GP", fillalpha = 0.1)
Example block output

Gaussian Processes fit a mean and variance function:

AbstractGPs.mean_and_var(p_fx, [π / 2])
([0.2959950125626034], [0.6659054323490708])

Decision model

Our goal for this JuMP model is to embed the Gaussian Process from AbstractGPs into the model and then solve for different fixed values of x to recreate the function that the Gaussian Process has learned to approximate.

First, create a JuMP model:

model = Model(Ipopt.Optimizer)
 set_silent(model)
 @variable(model, x)

\[ x \]

Since a Gaussian Process is a infinite dimensional object (its prediction is a distribution), we need some way of converting the Gaussian Process into a finite set of scalar values. For this, we use the Quantile predictor:

predictor = MathOptAI.Quantile(p_fx, [0.25, 0.75]);
 y, _ = MathOptAI.add_predictor(model, predictor, [x])
(JuMP.VariableRef[moai_quantile[1], moai_quantile[2]], Quantile(_, [0.25, 0.75])
@@ -28,4 +28,4 @@
         push!(Y, [NaN, NaN])
     end
 end
-Plots.plot!(X, reduce(hcat, Y)'; label = ["P25" "P75"], linewidth = 3)
Example block output

This page was generated using Literate.jl.

+Plots.plot!(X, reduce(hcat, Y)'; label = ["P25" "P75"], linewidth = 3)Example block output

This page was generated using Literate.jl.

diff --git a/dev/tutorials/mnist/47760b11.svg b/dev/tutorials/mnist/2880cb7b.svg similarity index 88% rename from dev/tutorials/mnist/47760b11.svg rename to dev/tutorials/mnist/2880cb7b.svg index 76896574..3801b578 100644 --- a/dev/tutorials/mnist/47760b11.svg +++ b/dev/tutorials/mnist/2880cb7b.svg @@ -1,33 +1,33 @@ - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + diff --git a/dev/tutorials/mnist_lux/e6bb4110.svg b/dev/tutorials/mnist/b0b1a6ac.svg similarity index 67% rename from dev/tutorials/mnist_lux/e6bb4110.svg rename to dev/tutorials/mnist/b0b1a6ac.svg index a1cb93a7..fcbcc0d3 100644 --- a/dev/tutorials/mnist_lux/e6bb4110.svg +++ b/dev/tutorials/mnist/b0b1a6ac.svg @@ -1,450 +1,360 @@ - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +gLhlegAAcON98+Sl6Qlco7/t/WN6AgAhLvcAAAAQJ+4BAAAgTtwDAABAnLgHAACAOHEPAAAAceIe +AAAA4sQ9AAAAxIl7AAAAiBP3AAAAECfuAQAAIE7cAwAAQJy4BwAAgDhxDwAAAHHiHgAAAOLEPQAA +AMSJewAAAIgT9wAAABAn7gEAACBO3AMAAECcuAcAAIA4cQ8AAABx4h4AAADixD0AAADEiXsAAACI +E/cAAAAQJ+4BAAAgTtwDAABAnLgHAACAOHEPAAAAceIeAAAA4sQ9AAAAxIl7AAAAiBP3AAAAECfu +AQAAIE7cAwAAQNwyPQAAuPEe/vqz0xPe5/7pASv/euLH0xO2XLj8+vQEAEJc7gEAACBO3AMAAECc +uAcAAIA4cQ8AAABx4h4AAADixD0AAADEiXsAAACIE/cAAAAQJ+4BAAAgTtwDAABAnLgHAACAOHEP +AAAAceIeAAAA4sQ9AAAAxIl7AAAAiBP3AAAAECfuAQAAIE7cAwAAQJy4BwAAgDhxDwAAAHHiHgAA +AOLEPQAAAMSJewAAAIgT9wAAwH/arwMSAAAAAEH/X7cj0BcCc3IPAAAAc3IPAAAAc3IPAAAAc3IP +AAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAcwGQIm4xmeXmvgAA +AABJRU5ErkJggg== +" transform="translate(2551, 90)"/> - + - - + + - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + +ECfuAQAAIE7cAwAAQJy4BwAAgDhxDwAAAHHiHgAAAOLEPQAAAMSJewAAAIgT9wAAfLP9OiABAAAA +EPT/dTsCfSEAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAA +c3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IP +AAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAA +c3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAc3IPAAAAcwFUoeV2ihyJCQAAAABJRU5ErkJggg== +" transform="translate(151, 1290)"/> - + - - + + - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev/tutorials/mnist/3f494451.svg b/dev/tutorials/mnist/de9b510b.svg similarity index 81% rename from dev/tutorials/mnist/3f494451.svg rename to dev/tutorials/mnist/de9b510b.svg index 4c17bb6e..36a109a2 100644 --- a/dev/tutorials/mnist/3f494451.svg +++ b/dev/tutorials/mnist/de9b510b.svg @@ -1,33 +1,33 @@ - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + +

Training

We use a simple neural network with one hidden layer and a sigmoid activation function. (There are better performing networks; try experimenting.)

predictor = Flux.Chain(
+Plots.plot([plot_image(train_data[i]) for i in 1:6]...; layout = (2, 3))
Example block output

Training

We use a simple neural network with one hidden layer and a sigmoid activation function. (There are better performing networks; try experimenting.)

predictor = Flux.Chain(
     Flux.Dense(28^2 => 32, Flux.sigmoid),
     Flux.Dense(32 => 10),
     Flux.softmax,
@@ -49,8 +49,8 @@
     println("Accuracy = $p %")
     return
 end
score_model (generic function with 1 method)

The accuracy of our model is only around 10% before training:

score_model(predictor, train_data)
-score_model(predictor, test_data)
Accuracy = 9.86 %
-Accuracy = 9.57 %

Let's improve that by training our model.

Note

It is not the purpose of this tutorial to explain how Flux works; see the documentation at https://fluxml.ai for more details. Changing the number of epochs or the learning rate can improve the loss.

begin
+score_model(predictor, test_data)
Accuracy = 13.39 %
+Accuracy = 13.45 %

Let's improve that by training our model.

Note

It is not the purpose of this tutorial to explain how Flux works; see the documentation at https://fluxml.ai for more details. Changing the number of epochs or the learning rate can improve the loss.

begin
     train_loader = data_loader(train_data; batchsize = 256, shuffle = true)
     optimizer_state = Flux.setup(Flux.Adam(3e-4), predictor)
     for epoch in 1:30
@@ -66,47 +66,47 @@
         print("Epoch $epoch: loss = $loss\t")
         score_model(predictor, test_data)
     end
-end
Epoch 1: loss = 1.8005	Accuracy = 74.68 %
-Epoch 2: loss = 1.1646	Accuracy = 83.32 %
-Epoch 3: loss = 0.8594	Accuracy = 86.61 %
-Epoch 4: loss = 0.6817	Accuracy = 88.24 %
-Epoch 5: loss = 0.5702	Accuracy = 89.45 %
-Epoch 6: loss = 0.4955	Accuracy = 90.07 %
-Epoch 7: loss = 0.4441	Accuracy = 90.61 %
-Epoch 8: loss = 0.4053	Accuracy = 91.02 %
-Epoch 9: loss = 0.3758	Accuracy = 91.27 %
-Epoch 10: loss = 0.3523	Accuracy = 91.53 %
-Epoch 11: loss = 0.3325	Accuracy = 91.75 %
-Epoch 12: loss = 0.3162	Accuracy = 91.99 %
-Epoch 13: loss = 0.3019	Accuracy = 92.27 %
-Epoch 14: loss = 0.2899	Accuracy = 92.43 %
-Epoch 15: loss = 0.2788	Accuracy = 92.64 %
-Epoch 16: loss = 0.2693	Accuracy = 92.67 %
-Epoch 17: loss = 0.2603	Accuracy = 92.89 %
-Epoch 18: loss = 0.2525	Accuracy = 93.06 %
-Epoch 19: loss = 0.2458	Accuracy = 93.05 %
-Epoch 20: loss = 0.2382	Accuracy = 93.25 %
-Epoch 21: loss = 0.2319	Accuracy = 93.42 %
-Epoch 22: loss = 0.2261	Accuracy = 93.55 %
-Epoch 23: loss = 0.2206	Accuracy = 93.67 %
-Epoch 24: loss = 0.215	Accuracy = 93.89 %
-Epoch 25: loss = 0.2101	Accuracy = 93.9 %
-Epoch 26: loss = 0.2055	Accuracy = 94.08 %
-Epoch 27: loss = 0.2011	Accuracy = 94.17 %
-Epoch 28: loss = 0.1971	Accuracy = 94.24 %
-Epoch 29: loss = 0.1929	Accuracy = 94.3 %
-Epoch 30: loss = 0.1891	Accuracy = 94.38 %

Here are the first eight predictions of the test data:

function plot_image(predictor, x::Matrix)
+end
Epoch 1: loss = 1.797	Accuracy = 77.46 %
+Epoch 2: loss = 1.1648	Accuracy = 83.94 %
+Epoch 3: loss = 0.863	Accuracy = 86.37 %
+Epoch 4: loss = 0.6885	Accuracy = 87.93 %
+Epoch 5: loss = 0.5774	Accuracy = 89.04 %
+Epoch 6: loss = 0.5028	Accuracy = 89.79 %
+Epoch 7: loss = 0.4491	Accuracy = 90.43 %
+Epoch 8: loss = 0.4103	Accuracy = 90.73 %
+Epoch 9: loss = 0.38	Accuracy = 91.07 %
+Epoch 10: loss = 0.3564	Accuracy = 91.38 %
+Epoch 11: loss = 0.3373	Accuracy = 91.71 %
+Epoch 12: loss = 0.3206	Accuracy = 91.81 %
+Epoch 13: loss = 0.3067	Accuracy = 92.03 %
+Epoch 14: loss = 0.2947	Accuracy = 92.31 %
+Epoch 15: loss = 0.2842	Accuracy = 92.44 %
+Epoch 16: loss = 0.2744	Accuracy = 92.66 %
+Epoch 17: loss = 0.2652	Accuracy = 92.7 %
+Epoch 18: loss = 0.2576	Accuracy = 92.94 %
+Epoch 19: loss = 0.2503	Accuracy = 92.93 %
+Epoch 20: loss = 0.2435	Accuracy = 93.09 %
+Epoch 21: loss = 0.2372	Accuracy = 93.21 %
+Epoch 22: loss = 0.2319	Accuracy = 93.32 %
+Epoch 23: loss = 0.2261	Accuracy = 93.45 %
+Epoch 24: loss = 0.2208	Accuracy = 93.57 %
+Epoch 25: loss = 0.2158	Accuracy = 93.71 %
+Epoch 26: loss = 0.2116	Accuracy = 93.91 %
+Epoch 27: loss = 0.2071	Accuracy = 93.97 %
+Epoch 28: loss = 0.2029	Accuracy = 94.05 %
+Epoch 29: loss = 0.199	Accuracy = 94.12 %
+Epoch 30: loss = 0.1955	Accuracy = 94.28 %

Here are the first eight predictions of the test data:

function plot_image(predictor, x::Matrix)
     score, index = findmax(predictor(vec(x)))
     title = "Predicted: $(index - 1) ($(round(Int, 100 * score))%)"
     return plot_image(x; title)
 end
 
 plots = [plot_image(predictor, test_data[i].features) for i in 1:8]
-Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

We can also look at the best and worst four predictions:

x, y = only(data_loader(test_data; batchsize = length(test_data)))
+Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

We can also look at the best and worst four predictions:

x, y = only(data_loader(test_data; batchsize = length(test_data)))
 losses = Flux.crossentropy(predictor(x), y; agg = identity)
 indices = sortperm(losses; dims = 2)[[1:4; end-3:end]]
 plots = [plot_image(predictor, test_data[i].features) for i in indices]
-Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

There are still some fairly bad mistakes. Can you change the model or training parameters improve to improve things?

JuMP

Now that we have a trained machine learning model, we can embed it in a JuMP model.

Here's a function which takes a test case and returns an example that maximizes the probability of the adversarial example.

function find_adversarial_image(test_case; adversary_label, δ = 0.05)
+Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

There are still some fairly bad mistakes. Can you change the model or training parameters improve to improve things?

JuMP

Now that we have a trained machine learning model, we can embed it in a JuMP model.

Here's a function which takes a test case and returns an example that maximizes the probability of the adversarial example.

function find_adversarial_image(test_case; adversary_label, δ = 0.05)
     model = Model(Ipopt.Optimizer)
     set_silent(model)
     @variable(model, 0 <= x[1:28, 1:28] <= 1)
@@ -122,4 +122,4 @@
 Plots.plot(
     plot_image(predictor, test_data[3].features),
     plot_image(predictor, Float32.(x_adversary)),
-)
Example block output

This page was generated using Literate.jl.

+)Example block output

This page was generated using Literate.jl.

diff --git a/dev/tutorials/mnist_lux/2be4fa28.svg b/dev/tutorials/mnist_lux/51e7e4a5.svg similarity index 86% rename from dev/tutorials/mnist_lux/2be4fa28.svg rename to dev/tutorials/mnist_lux/51e7e4a5.svg index e7ff2936..badf08b0 100644 --- a/dev/tutorials/mnist_lux/2be4fa28.svg +++ b/dev/tutorials/mnist_lux/51e7e4a5.svg @@ -1,33 +1,33 @@ - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + diff --git a/dev/tutorials/mnist/e6e568ce.svg b/dev/tutorials/mnist_lux/65060d29.svg similarity index 61% rename from dev/tutorials/mnist/e6e568ce.svg rename to dev/tutorials/mnist_lux/65060d29.svg index 8a188f08..77b7914e 100644 --- a/dev/tutorials/mnist/e6e568ce.svg +++ b/dev/tutorials/mnist_lux/65060d29.svg @@ -1,451 +1,461 @@ - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + + - + - + - - - - - - - - - - - + + + + + + + + + + +

Training

We use a simple neural network with one hidden layer and a sigmoid activation function. (There are better performing networks; try experimenting.)

chain = Lux.Chain(
+Plots.plot([plot_image(train_data[i]) for i in 1:6]...; layout = (2, 3))
Example block output

Training

We use a simple neural network with one hidden layer and a sigmoid activation function. (There are better performing networks; try experimenting.)

chain = Lux.Chain(
     Lux.Dense(28^2 => 32, Lux.sigmoid),
     Lux.Dense(32 => 10),
     Lux.softmax,
@@ -54,8 +54,8 @@
     println("Accuracy = $p %")
     return
 end
score_model (generic function with 1 method)

The accuracy of our model is only around 10% before training:

score_model(predictor, train_data)
-score_model(predictor, test_data)
Accuracy = 14.22 %
-Accuracy = 14.49 %

Let's improve that by training our model.

Note

It is not the purpose of this tutorial to explain how Lux works; see the documentation at https://lux.csail.mit.edu for more details. Changing the number of epochs or the learning rate can improve the loss.

begin
+score_model(predictor, test_data)
Accuracy = 9.91 %
+Accuracy = 10.09 %

Let's improve that by training our model.

Note

It is not the purpose of this tutorial to explain how Lux works; see the documentation at https://lux.csail.mit.edu for more details. Changing the number of epochs or the learning rate can improve the loss.

begin
     train_loader = data_loader(train_data; batchsize = 256, shuffle = true)
     optimizer_state = Optimisers.setup(Optimisers.Adam(0.0003f0), parameters)
     for epoch in 1:30
@@ -74,36 +74,36 @@
         print("Epoch $epoch: loss = $loss\t")
         score_model(predictor, test_data)
     end
-end
Epoch 1: loss = 1.8316	Accuracy = 76.18 %
-Epoch 2: loss = 1.1986	Accuracy = 85.17 %
-Epoch 3: loss = 0.879	Accuracy = 87.43 %
-Epoch 4: loss = 0.6923	Accuracy = 88.85 %
-Epoch 5: loss = 0.5758	Accuracy = 89.62 %
-Epoch 6: loss = 0.4986	Accuracy = 90.23 %
-Epoch 7: loss = 0.4446	Accuracy = 90.8 %
-Epoch 8: loss = 0.4046	Accuracy = 91.15 %
-Epoch 9: loss = 0.3744	Accuracy = 91.39 %
-Epoch 10: loss = 0.3499	Accuracy = 91.77 %
-Epoch 11: loss = 0.3305	Accuracy = 91.87 %
-Epoch 12: loss = 0.3143	Accuracy = 92.08 %
-Epoch 13: loss = 0.3003	Accuracy = 92.25 %
-Epoch 14: loss = 0.2887	Accuracy = 92.42 %
-Epoch 15: loss = 0.278	Accuracy = 92.63 %
-Epoch 16: loss = 0.268	Accuracy = 92.78 %
-Epoch 17: loss = 0.2594	Accuracy = 93.0 %
-Epoch 18: loss = 0.2522	Accuracy = 93.13 %
-Epoch 19: loss = 0.2447	Accuracy = 93.2 %
-Epoch 20: loss = 0.238	Accuracy = 93.3 %
-Epoch 21: loss = 0.2323	Accuracy = 93.46 %
-Epoch 22: loss = 0.2261	Accuracy = 93.61 %
-Epoch 23: loss = 0.2209	Accuracy = 93.67 %
-Epoch 24: loss = 0.2158	Accuracy = 93.84 %
-Epoch 25: loss = 0.2108	Accuracy = 93.91 %
-Epoch 26: loss = 0.2064	Accuracy = 94.04 %
-Epoch 27: loss = 0.2018	Accuracy = 94.1 %
-Epoch 28: loss = 0.1979	Accuracy = 94.22 %
-Epoch 29: loss = 0.1938	Accuracy = 94.25 %
-Epoch 30: loss = 0.1901	Accuracy = 94.37 %

Here are the first eight predictions of the test data:

function plot_image(predictor, x::Matrix)
+end
Epoch 1: loss = 1.7929	Accuracy = 78.65 %
+Epoch 2: loss = 1.1442	Accuracy = 84.75 %
+Epoch 3: loss = 0.8427	Accuracy = 87.34 %
+Epoch 4: loss = 0.6691	Accuracy = 88.76 %
+Epoch 5: loss = 0.5599	Accuracy = 89.49 %
+Epoch 6: loss = 0.4867	Accuracy = 90.26 %
+Epoch 7: loss = 0.4349	Accuracy = 90.69 %
+Epoch 8: loss = 0.3962	Accuracy = 91.0 %
+Epoch 9: loss = 0.3666	Accuracy = 91.18 %
+Epoch 10: loss = 0.3432	Accuracy = 91.48 %
+Epoch 11: loss = 0.324	Accuracy = 91.83 %
+Epoch 12: loss = 0.3077	Accuracy = 92.15 %
+Epoch 13: loss = 0.2937	Accuracy = 92.33 %
+Epoch 14: loss = 0.282	Accuracy = 92.53 %
+Epoch 15: loss = 0.2715	Accuracy = 92.69 %
+Epoch 16: loss = 0.2619	Accuracy = 92.84 %
+Epoch 17: loss = 0.2536	Accuracy = 93.07 %
+Epoch 18: loss = 0.2457	Accuracy = 93.1 %
+Epoch 19: loss = 0.239	Accuracy = 93.29 %
+Epoch 20: loss = 0.2326	Accuracy = 93.41 %
+Epoch 21: loss = 0.2264	Accuracy = 93.52 %
+Epoch 22: loss = 0.2217	Accuracy = 93.64 %
+Epoch 23: loss = 0.2157	Accuracy = 93.82 %
+Epoch 24: loss = 0.2108	Accuracy = 93.96 %
+Epoch 25: loss = 0.2062	Accuracy = 94.06 %
+Epoch 26: loss = 0.202	Accuracy = 94.28 %
+Epoch 27: loss = 0.1981	Accuracy = 94.32 %
+Epoch 28: loss = 0.1939	Accuracy = 94.46 %
+Epoch 29: loss = 0.1901	Accuracy = 94.55 %
+Epoch 30: loss = 0.1866	Accuracy = 94.65 %

Here are the first eight predictions of the test data:

function plot_image(predictor, x::Matrix)
     y, _ = chain(vec(x), parameters, state)
     score, index = findmax(y)
     title = "Predicted: $(index - 1) ($(round(Int, 100 * score))%)"
@@ -111,12 +111,12 @@
 end
 
 plots = [plot_image(predictor, test_data[i].features) for i in 1:8]
-Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

We can also look at the best and worst four predictions:

x, y = only(data_loader(test_data; batchsize = length(test_data)))
+Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

We can also look at the best and worst four predictions:

x, y = only(data_loader(test_data; batchsize = length(test_data)))
 y_model, _ = chain(x, parameters, state)
 losses = Lux.CrossEntropyLoss(; agg = identity)(y_model, y)
 indices = sortperm(losses; dims = 2)[[1:4; end-3:end]]
 plots = [plot_image(predictor, test_data[i].features) for i in indices]
-Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

There are still some fairly bad mistakes. Can you change the model or training parameters improve to improve things?

JuMP

Now that we have a trained machine learning model, we can embed it in a JuMP model.

Here's a function which takes a test case and returns an example that maximizes the probability of the adversarial example.

function find_adversarial_image(test_case; adversary_label, δ = 0.05)
+Plots.plot(plots...; size = (1200, 600), layout = (2, 4))
Example block output

There are still some fairly bad mistakes. Can you change the model or training parameters improve to improve things?

JuMP

Now that we have a trained machine learning model, we can embed it in a JuMP model.

Here's a function which takes a test case and returns an example that maximizes the probability of the adversarial example.

function find_adversarial_image(test_case; adversary_label, δ = 0.05)
     model = Model(Ipopt.Optimizer)
     set_silent(model)
     @variable(model, 0 <= x[1:28, 1:28] <= 1)
@@ -132,4 +132,4 @@
 Plots.plot(
     plot_image(predictor, test_data[3].features),
     plot_image(predictor, Float32.(x_adversary)),
-)
Example block output

This page was generated using Literate.jl.

+)Example block output

This page was generated using Literate.jl.

diff --git a/dev/tutorials/pytorch/61c0955a.svg b/dev/tutorials/pytorch/ea38d5e3.svg similarity index 82% rename from dev/tutorials/pytorch/61c0955a.svg rename to dev/tutorials/pytorch/ea38d5e3.svg index aa70bea8..464297ad 100644 --- a/dev/tutorials/pytorch/61c0955a.svg +++ b/dev/tutorials/pytorch/ea38d5e3.svg @@ -1,48 +1,48 @@ - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dev/tutorials/pytorch/index.html b/dev/tutorials/pytorch/index.html index cb43a7c8..38bb0c91 100644 --- a/dev/tutorials/pytorch/index.html +++ b/dev/tutorials/pytorch/index.html @@ -41,4 +41,4 @@ push!(Y, objective_value(model)) end Plots.plot(x -> x^2 - 2x, X; label = "Truth", linestype = :dot) -Plots.plot!(X, Y; label = "Fitted")Example block output

This page was generated using Literate.jl.

+Plots.plot!(X, Y; label = "Fitted")Example block output

This page was generated using Literate.jl.

diff --git a/dev/tutorials/student_enrollment/index.html b/dev/tutorials/student_enrollment/index.html index 2ba7ea7e..1df54a28 100644 --- a/dev/tutorials/student_enrollment/index.html +++ b/dev/tutorials/student_enrollment/index.html @@ -138,7 +138,7 @@ Dual objective value : -4.91918e+02 * Work counters - Solve time (sec) : 6.06374e+00 + Solve time (sec) : 5.99427e+00 Barrier iterations : 142

Let's store the solution in evaluate_df for analysis:

julia> evaluate_df.merit_sol = value.(evaluate_df.merit);
julia> evaluate_df.enroll_sol = value.(evaluate_df.enroll);
julia> evaluate_df6000×7 DataFrame Row StudentID SAT GPA merit enroll merit_sol ⋯ │ Int64 Int64 Float64 GenericV… GenericV… Float64 ⋯ @@ -159,4 +159,4 @@ 5998 │ 5998 1165 2.81 x_merit[5998] moai_Sigmoid[1] 1.21872 ⋯ 5999 │ 5999 1400 3.43 x_merit[5999] moai_Sigmoid[1] 1.33971e-6 6000 │ 6000 1097 2.65 x_merit[6000] moai_Sigmoid[1] 1.17865 - 1 column and 5985 rows omitted

Solution analysis

We expect that just under 2,500 students will enroll:

julia> sum(evaluate_df.enroll_sol)2488.1951671444017

We awarded merit scholarships to approximately 1 in 6 students:

julia> count(evaluate_df.merit_sol .> 1e-5)1152

The average merit scholarship was worth just over $1,000:

julia> 1_000 * Statistics.mean(evaluate_df.merit_sol[evaluate_df.merit_sol.>1e-5])1041.6622990671967

This page was generated using Literate.jl.

+ 1 column and 5985 rows omitted

Solution analysis

We expect that just under 2,500 students will enroll:

julia> sum(evaluate_df.enroll_sol)2488.1951671444017

We awarded merit scholarships to approximately 1 in 6 students:

julia> count(evaluate_df.merit_sol .> 1e-5)1152

The average merit scholarship was worth just over $1,000:

julia> 1_000 * Statistics.mean(evaluate_df.merit_sol[evaluate_df.merit_sol.>1e-5])1041.6622990671967

This page was generated using Literate.jl.