Skip to content

Commit

Permalink
[SymForce] Partially clean up sym opt C++ docs
Browse files Browse the repository at this point in the history
Topic: sym-opt-cpp-docs
Relative: sym-docs-gen-links
GitOrigin-RevId: e70ef4b29c8712dcd7960051af8649e262000fe2
  • Loading branch information
aaron-skydio committed May 31, 2023
1 parent cce64be commit 0e0fafc
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 91 deletions.
114 changes: 62 additions & 52 deletions symforce/opt/levenberg_marquardt_solver.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,81 +35,91 @@ namespace sym {
*
* Example usage:
*
* constexpr const int M = 9;
* constexpr const int N = 5;
*
* // Create a function that computes the residual (a linear residual for this example)
* const auto J_MN = sym::RandomNormalMatrix<double, M, N>(gen);
* const auto linearize_func = [&J_MN](const sym::Valuesd& values,
* sym::SparseLinearizationd* const linearization) {
* const auto state_vec = values.At<sym::Vector5d>('v');
* linearization->residual = J_MN * state_vec;
* linearization->hessian_lower = (J_MN.transpose() * J_MN).sparseView();
* linearization->jacobian = J_MN.sparseView();
* linearization->rhs = J_MN.transpose() * linearization->residual;
* };
*
* // Create a Values
* sym::Valuesd values_init{};
* values_init.Set('v', (StateVector::Ones() * 100).eval());
*
* // Create a Solver
* sym::LevenbergMarquardtSolverd solver(params, "", kEpsilon);
* solver.SetIndex(values_init.CreateIndex({'v'}));
* solver.Reset(values_init);
*
* // Iterate to convergence
* sym::optimization_stats_t stats;
* bool should_early_exit = false;
* while (!should_early_exit) {
* should_early_exit = solver.Iterate(linearize_func, &stats);
* }
*
* // Get the best values
* sym::Valuesd values_final = solver.GetBestValues();
* constexpr const int M = 9;
* constexpr const int N = 5;
*
* // Create a function that computes the residual (a linear residual for this example)
* const auto J_MN = sym::RandomNormalMatrix<double, M, N>(gen);
* const auto linearize_func = [&J_MN](const sym::Valuesd& values,
* sym::SparseLinearizationd* const linearization) {
* const auto state_vec = values.At<sym::Vector5d>('v');
* linearization->residual = J_MN * state_vec;
* linearization->hessian_lower = (J_MN.transpose() * J_MN).sparseView();
* linearization->jacobian = J_MN.sparseView();
* linearization->rhs = J_MN.transpose() * linearization->residual;
* };
*
* // Create a Values
* sym::Valuesd values_init{};
* values_init.Set('v', (StateVector::Ones() * 100).eval());
*
* // Create a Solver
* sym::LevenbergMarquardtSolverd solver(params, "", kEpsilon);
* solver.SetIndex(values_init.CreateIndex({'v'}));
* solver.Reset(values_init);
*
* // Iterate to convergence
* sym::optimization_stats_t stats;
* bool should_early_exit = false;
* while (!should_early_exit) {
* should_early_exit = solver.Iterate(linearize_func, &stats);
* }
*
* // Get the best values
* sym::Valuesd values_final = solver.GetBestValues();
*
* The theory:
*
* We start with a nonlinear vector-valued error function that defines an error residual for
* which we want to minimize the squared norm. The residual is dimension M, the state is N.
* residual = f(x)
*
* residual = f(x)
*
* Define a least squares cost function as the squared norm of the residual:
* e(x) = 0.5 * ||f(x)||**2 = 0.5 * f(x).T * f(x)
*
* e(x) = 0.5 * ||f(x)||**2 = 0.5 * f(x).T * f(x)
*
* Take the first order taylor expansion for x around the linearization point x0:
* f(x) = f(x0) + f'(x0) * (x - x0) + ...
*
* f(x) = f(x0) + f'(x0) * (x - x0) + ...
*
* Plug in to the cost function to get a quadratic:
* e(x) ~= 0.5 * (x - x0).T * f'(x0).T * f'(x0) * (x - x0) + f(x0).T * f'(x0) * (x - x0)
* + 0.5 * f(x0).T * f(x0)
*
* e(x) ~= 0.5 * (x - x0).T * f'(x0).T * f'(x0) * (x - x0) + f(x0).T * f'(x0) * (x - x0)
* + 0.5 * f(x0).T * f(x0)
*
* Take derivative with respect to x:
* e'(x) = f'(x0).T * f'(x0) * (x - x0) + f'(x0).T * f(x0)
*
* e'(x) = f'(x0).T * f'(x0) * (x - x0) + f'(x0).T * f(x0)
*
* Set to zero to find the minimum value of the quadratic (paraboloid):
* 0 = f'(x0).T * f'(x0) * (x - x0) + f'(x0).T * f(x0)
* (x - x0) = - inv(f'(x0).T * f'(x0)) * f'(x0).T * f(x0)
* x = x0 - inv(f'(x0).T * f'(x0)) * f'(x0).T * f(x0)
*
* 0 = f'(x0).T * f'(x0) * (x - x0) + f'(x0).T * f(x0)
* (x - x0) = - inv(f'(x0).T * f'(x0)) * f'(x0).T * f(x0)
* x = x0 - inv(f'(x0).T * f'(x0)) * f'(x0).T * f(x0)
*
* Another way to write this is to create some helpful shorthand:
* f'(x0) --> jacobian or J (shape = MxN)
* f (x0) --> bias or b (shape = Mx1)
* x - x0 --> dx (shape = Nx1)
*
* f'(x0) --> jacobian or J (shape = MxN)
* f (x0) --> bias or b (shape = Mx1)
* x - x0 --> dx (shape = Nx1)
*
* Rederiving the Gauss-Newton solution:
* e(x) ~= 0.5 * dx.T * J.T * J * dx + b.T * J * dx + 0.5 * b.T * b
* e'(x) = J.T * J * dx + J.T * b
* x = x0 - inv(J.T * J) * J.T * b
*
* e(x) ~= 0.5 * dx.T * J.T * J * dx + b.T * J * dx + 0.5 * b.T * b
* e'(x) = J.T * J * dx + J.T * b
* x = x0 - inv(J.T * J) * J.T * b
*
* A couple more common names:
* f'(x0).T * f'(x0) = J.T * J --> hessian approximation or H (shape = NxN)
* f'(x0).T * f (x0) = J.T * b --> right hand side or rhs (shape = Nx1)
*
* f'(x0).T * f'(x0) = J.T * J --> hessian approximation or H (shape = NxN)
* f'(x0).T * f (x0) = J.T * b --> right hand side or rhs (shape = Nx1)
*
* So the iteration loop for optimization is:
* J, b = linearize(f, x0)
* dx = -inv(J.T * J) * J.T * b
* x_new = x0 + dx
*
* J, b = linearize(f, x0)
* dx = -inv(J.T * J) * J.T * b
* x_new = x0 + dx
*
* Technically what we've just described is the Gauss-Newton algorithm; the Levenberg-Marquardt
* algorithm is based on Gauss-Newton, but adds a term to J.T * J before inverting to make sure
Expand Down
80 changes: 41 additions & 39 deletions symforce/opt/optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,53 +17,55 @@ namespace sym {

/**
* Class for optimizing a nonlinear least-squares problem specified as a list of Factors. For
* efficient use, create once and call Optimize() multiple times with different initial guesses, as
* long as the factors remain constant and the structure of the Values is identical.
* efficient use, create once and call Optimize() multiple times with different initial
* guesses, as long as the factors remain constant and the structure of the Values is identical.
*
* Not thread safe! Create one per thread.
*
* Example usage:
*
* // Create a Values
* sym::Key key0{'R', 0};
* sym::Key key1{'R', 1};
* sym::Valuesd values;
* values.Set(key0, sym::Rot3d::Identity());
* values.Set(key1, sym::Rot3d::Identity());
* // Create a Values
* sym::Key key0{'R', 0};
* sym::Key key1{'R', 1};
* sym::Valuesd values;
* values.Set(key0, sym::Rot3d::Identity());
* values.Set(key1, sym::Rot3d::Identity());
*
* // Create some factors
* std::vector<sym::Factord> factors;
* factors.push_back(sym::Factord::Jacobian(
* [epsilon](const sym::Rot3d& rot, Eigen::Vector3d* const res, Eigen::Matrix3d* const jac) {
* const sym::Rot3d prior = sym::Rot3d::Random();
* const Eigen::Matrix3d sqrt_info = Eigen::Vector3d::Ones().asDiagonal();
* sym::PriorFactorRot3<double>(rot, prior, sqrt_info, epsilon, res, jac);
* },
* {key0}));
* factors.push_back(sym::Factord::Jacobian(
* [epsilon](const sym::Rot3d& rot, Eigen::Vector3d* const res, Eigen::Matrix3d* const jac) {
* const sym::Rot3d prior = sym::Rot3d::Random();
* const Eigen::Matrix3d sqrt_info = Eigen::Vector3d::Ones().asDiagonal();
* sym::PriorFactorRot3<double>(rot, prior, sqrt_info, epsilon, res, jac);
* },
* {key1}));
* factors.push_back(sym::Factord::Jacobian(
* [epsilon](const sym::Rot3d& a, const sym::Rot3d& b, Eigen::Vector3d* const res,
* Eigen::Matrix<double, 3, 6>* const jac) {
* const Eigen::Matrix3d sqrt_info = Eigen::Vector3d::Ones().asDiagonal();
* const sym::Rot3d a_T_b = sym::Rot3d::Random();
* sym::BetweenFactorRot3<double>(a, b, a_T_b, sqrt_info, epsilon, res, jac);
* },
* {key0, key1}));
* // Create some factors
* std::vector<sym::Factord> factors;
* factors.push_back(sym::Factord::Jacobian(
* [epsilon](const sym::Rot3d& rot, Eigen::Vector3d* const res,
* Eigen::Matrix3d* const jac) {
* const sym::Rot3d prior = sym::Rot3d::Random();
* const Eigen::Matrix3d sqrt_info = Eigen::Vector3d::Ones().asDiagonal();
* sym::PriorFactorRot3<double>(rot, prior, sqrt_info, epsilon, res, jac);
* },
* {key0}));
* factors.push_back(sym::Factord::Jacobian(
* [epsilon](const sym::Rot3d& rot, Eigen::Vector3d* const res,
* Eigen::Matrix3d* const jac) {
* const sym::Rot3d prior = sym::Rot3d::Random();
* const Eigen::Matrix3d sqrt_info = Eigen::Vector3d::Ones().asDiagonal();
* sym::PriorFactorRot3<double>(rot, prior, sqrt_info, epsilon, res, jac);
* },
* {key1}));
* factors.push_back(sym::Factord::Jacobian(
* [epsilon](const sym::Rot3d& a, const sym::Rot3d& b, Eigen::Vector3d* const res,
* Eigen::Matrix<double, 3, 6>* const jac) {
* const Eigen::Matrix3d sqrt_info = Eigen::Vector3d::Ones().asDiagonal();
* const sym::Rot3d a_T_b = sym::Rot3d::Random();
* sym::BetweenFactorRot3<double>(a, b, a_T_b, sqrt_info, epsilon, res, jac);
* },
* {key0, key1}));
*
* // Set up the params
* sym::optimizer_params_t params = DefaultLmParams();
* params.iterations = 50;
* params.early_exit_min_reduction = 0.0001;
* // Set up the params
* sym::optimizer_params_t params = DefaultLmParams();
* params.iterations = 50;
* params.early_exit_min_reduction = 0.0001;
*
* // Optimize
* sym::Optimizer<double> optimizer(params, factors, epsilon);
* optimizer.Optimize(values);
* // Optimize
* sym::Optimizer<double> optimizer(params, factors, epsilon);
* optimizer.Optimize(values);
*
* See symforce/test/symforce_optimizer_test.cc for more examples
*/
Expand Down

0 comments on commit 0e0fafc

Please sign in to comment.