diff --git a/include/boost/math/differentiation/autodiff_reverse.hpp b/include/boost/math/differentiation/autodiff_reverse.hpp
index 57584a765a..4670b1d138 100644
--- a/include/boost/math/differentiation/autodiff_reverse.hpp
+++ b/include/boost/math/differentiation/autodiff_reverse.hpp
@@ -580,6 +580,8 @@ class rvar : public expression<RealType, DerivativeOrder, rvar<RealType, Derivat
         }
         it->backward();
     }
+
+    void set_item(RealType value) { value_ = inner_t(value); }
 };
 
 template<typename RealType, size_t DerivativeOrder>
diff --git a/include/boost/math/differentiation/detail/reverse_mode_autodiff_memory_management.hpp b/include/boost/math/differentiation/detail/reverse_mode_autodiff_memory_management.hpp
index b0b5ab0555..d13a5aca11 100644
--- a/include/boost/math/differentiation/detail/reverse_mode_autodiff_memory_management.hpp
+++ b/include/boost/math/differentiation/detail/reverse_mode_autodiff_memory_management.hpp
@@ -23,407 +23,435 @@ namespace detail {
 template<typename allocator_type, size_t buffer_size>
 class flat_linear_allocator_iterator
 {
-    /**
+  /**
    * @brief enables iterating over linear allocator with
    * c++ iterators
    */
 public:
-    using raw_allocator_type   = std::remove_const_t<allocator_type>;
-    using value_type           = typename allocator_type::value_type;
-    using pointer              = typename allocator_type::value_type *;
-    using const_ptr_type       = const value_type *;
-    using reference            = typename allocator_type::value_type &;
-    using const_reference_type = const value_type &;
-    using iterator_category    = std::random_access_iterator_tag;
-    using difference_type      = ptrdiff_t;
+  using raw_allocator_type = std::remove_const_t<allocator_type>;
+  using value_type = typename allocator_type::value_type;
+  using pointer = typename allocator_type::value_type*;
+  using const_ptr_type = const value_type*;
+  using reference = typename allocator_type::value_type&;
+  using const_reference_type = const value_type&;
+  using iterator_category = std::random_access_iterator_tag;
+  using difference_type = ptrdiff_t;
 
 private:
-    const allocator_type *storage_ = nullptr;
-    size_t                index_   = 0;
-    size_t                begin_   = 0;
-    size_t                end_     = 0;
+  const allocator_type* storage_ = nullptr;
+  size_t index_ = 0;
+  size_t begin_ = 0;
+  size_t end_ = 0;
 
 public:
-    flat_linear_allocator_iterator() = default;
-
-    explicit flat_linear_allocator_iterator(allocator_type *storage, size_t index)
-        : storage_(storage)
-        , index_(index)
-        , begin_(0)
-        , end_(storage->size())
-    {}
-
-    explicit flat_linear_allocator_iterator(allocator_type *storage,
-                                            size_t          index,
-                                            size_t          begin,
-                                            size_t          end)
-        : storage_(storage)
-        , index_(index)
-        , begin_(begin)
-        , end_(end)
-    {}
-
-    explicit flat_linear_allocator_iterator(const allocator_type *storage, size_t index)
-        : storage_(storage)
-        , index_(index)
-        , begin_(0)
-        , end_(storage->size())
-    {}
-
-    explicit flat_linear_allocator_iterator(const allocator_type *storage,
-                                            size_t                index,
-                                            size_t                begin,
-                                            size_t                end)
-        : storage_(storage)
-        , index_(index)
-        , begin_(begin)
-        , end_(end)
-    {}
-    reference operator*()
-    {
-        BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
-        return (*storage_->data_[index_ / buffer_size])[index_ % buffer_size];
-    }
-
-    const_reference_type operator*() const
-    {
-        BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
-        return (*storage_->data_[index_ / buffer_size])[index_ % buffer_size];
-    }
-
-    pointer operator->()
-    {
-        BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
-        return &operator*();
-    }
-
-    const_ptr_type operator->() const
-    {
-        BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
-        return &operator*();
-    }
-    flat_linear_allocator_iterator &operator++()
-    {
-        ++index_;
-        return *this;
-    }
-
-    flat_linear_allocator_iterator operator++(int)
-    {
-        auto tmp = *this;
-        ++(*this);
-        return tmp;
-    }
-
-    flat_linear_allocator_iterator &operator--()
-    {
-        --index_;
-        return *this;
-    }
-
-    flat_linear_allocator_iterator operator--(int)
-    {
-        auto tmp = *this;
-        --(*this);
-        return tmp;
-    }
-
-    bool operator==(const flat_linear_allocator_iterator &other) const
-    {
-        return index_ == other.index_ && storage_ == other.storage_;
-    }
-
-    bool operator!=(const flat_linear_allocator_iterator &other) const { return !(*this == other); }
-
-    flat_linear_allocator_iterator operator+(difference_type n) const
-    {
-        return flat_linear_allocator_iterator(storage_, index_ + static_cast<size_t>(n), begin_, end_);
-    }
-
-    flat_linear_allocator_iterator &operator+=(difference_type n)
-    {
-        index_ += n;
-        return *this;
-    }
-
-    flat_linear_allocator_iterator operator-(difference_type n) const
-    {
-        return flat_linear_allocator_iterator(storage_, index_ - n, begin_, end_);
-    }
-    flat_linear_allocator_iterator &operator-=(difference_type n)
-    {
-        index_ -= n;
-        return *this;
-    }
-
-    difference_type operator-(const flat_linear_allocator_iterator &other) const
-    {
-        return static_cast<difference_type>(index_) - static_cast<difference_type>(other.index_);
-    }
-
-    reference operator[](difference_type n) { return *(*this + n); }
-
-    const_reference_type operator[](difference_type n) const { return *(*this + n); }
-
-    bool operator<(const flat_linear_allocator_iterator &other) const
-    {
-        return index_ < other.index_;
-    }
-
-    bool operator>(const flat_linear_allocator_iterator &other) const
-    {
-        return index_ > other.index_;
-    }
-
-    bool operator<=(const flat_linear_allocator_iterator &other) const
-    {
-        return index_ <= other.index_;
-    }
-
-    bool operator>=(const flat_linear_allocator_iterator &other) const
-    {
-        return index_ >= other.index_;
-    }
-
-    bool operator!() const noexcept { return storage_ == nullptr; }
+  flat_linear_allocator_iterator() = default;
+
+  explicit flat_linear_allocator_iterator(allocator_type* storage, size_t index)
+    : storage_(storage)
+    , index_(index)
+    , begin_(0)
+    , end_(storage->size())
+  {
+  }
+
+  explicit flat_linear_allocator_iterator(allocator_type* storage,
+                                          size_t index,
+                                          size_t begin,
+                                          size_t end)
+    : storage_(storage)
+    , index_(index)
+    , begin_(begin)
+    , end_(end)
+  {
+  }
+
+  explicit flat_linear_allocator_iterator(const allocator_type* storage,
+                                          size_t index)
+    : storage_(storage)
+    , index_(index)
+    , begin_(0)
+    , end_(storage->size())
+  {
+  }
+
+  explicit flat_linear_allocator_iterator(const allocator_type* storage,
+                                          size_t index,
+                                          size_t begin,
+                                          size_t end)
+    : storage_(storage)
+    , index_(index)
+    , begin_(begin)
+    , end_(end)
+  {
+  }
+  reference operator*()
+  {
+    BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
+    return (*storage_->data_[index_ / buffer_size])[index_ % buffer_size];
+  }
+
+  const_reference_type operator*() const
+  {
+    BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
+    return (*storage_->data_[index_ / buffer_size])[index_ % buffer_size];
+  }
+
+  pointer operator->()
+  {
+    BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
+    return &operator*();
+  }
+
+  const_ptr_type operator->() const
+  {
+    BOOST_MATH_ASSERT(index_ >= begin_ && index_ < end_);
+    return &operator*();
+  }
+  flat_linear_allocator_iterator& operator++()
+  {
+    ++index_;
+    return *this;
+  }
+
+  flat_linear_allocator_iterator operator++(int)
+  {
+    auto tmp = *this;
+    ++(*this);
+    return tmp;
+  }
+
+  flat_linear_allocator_iterator& operator--()
+  {
+    --index_;
+    return *this;
+  }
+
+  flat_linear_allocator_iterator operator--(int)
+  {
+    auto tmp = *this;
+    --(*this);
+    return tmp;
+  }
+
+  bool operator==(const flat_linear_allocator_iterator& other) const
+  {
+    return index_ == other.index_ && storage_ == other.storage_;
+  }
+
+  bool operator!=(const flat_linear_allocator_iterator& other) const
+  {
+    return !(*this == other);
+  }
+
+  flat_linear_allocator_iterator operator+(difference_type n) const
+  {
+    return flat_linear_allocator_iterator(
+      storage_, index_ + static_cast<size_t>(n), begin_, end_);
+  }
+
+  flat_linear_allocator_iterator& operator+=(difference_type n)
+  {
+    index_ += n;
+    return *this;
+  }
+
+  flat_linear_allocator_iterator operator-(difference_type n) const
+  {
+    return flat_linear_allocator_iterator(storage_, index_ - n, begin_, end_);
+  }
+  flat_linear_allocator_iterator& operator-=(difference_type n)
+  {
+    index_ -= n;
+    return *this;
+  }
+
+  difference_type operator-(const flat_linear_allocator_iterator& other) const
+  {
+    return static_cast<difference_type>(index_) -
+           static_cast<difference_type>(other.index_);
+  }
+
+  reference operator[](difference_type n) { return *(*this + n); }
+
+  const_reference_type operator[](difference_type n) const
+  {
+    return *(*this + n);
+  }
+
+  bool operator<(const flat_linear_allocator_iterator& other) const
+  {
+    return index_ < other.index_;
+  }
+
+  bool operator>(const flat_linear_allocator_iterator& other) const
+  {
+    return index_ > other.index_;
+  }
+
+  bool operator<=(const flat_linear_allocator_iterator& other) const
+  {
+    return index_ <= other.index_;
+  }
+
+  bool operator>=(const flat_linear_allocator_iterator& other) const
+  {
+    return index_ >= other.index_;
+  }
+
+  bool operator!() const noexcept { return storage_ == nullptr; }
 };
 /* memory management helps for tape */
 template<typename RealType, size_t buffer_size>
 class flat_linear_allocator
 {
-    /** @brief basically a vector<array<T*, size>>
+  /** @brief basically a vector<array<T*, size>>
    * intended to work like a vector that allocates memory in chunks
    * and doesn't invalidate references
    * */
 public:
-    // store vector of unique pointers to arrays
-    // to avoid vector reference invalidation
-    using buffer_type = std::array<RealType, buffer_size>;
-    using buffer_ptr  = std::unique_ptr<std::array<RealType, buffer_size>>;
+  // store vector of unique pointers to arrays
+  // to avoid vector reference invalidation
+  using buffer_type = std::array<RealType, buffer_size>;
+  using buffer_ptr = std::unique_ptr<std::array<RealType, buffer_size>>;
 
 private:
-    std::vector<buffer_ptr> data_;
-    size_t                  total_size_ = 0;
-    std::vector<size_t>     checkpoints_; //{0};
+  std::vector<buffer_ptr> data_;
+  size_t total_size_ = 0;
+  std::vector<size_t> checkpoints_; //{0};
 
 public:
-    friend class flat_linear_allocator_iterator<flat_linear_allocator<RealType, buffer_size>,
-                                                buffer_size>;
-    friend class flat_linear_allocator_iterator<const flat_linear_allocator<RealType, buffer_size>,
-                                                buffer_size>;
-    using value_type = RealType;
-    using iterator
-        = flat_linear_allocator_iterator<flat_linear_allocator<RealType, buffer_size>, buffer_size>;
-    using const_iterator
-        = flat_linear_allocator_iterator<const flat_linear_allocator<RealType, buffer_size>,
-                                         buffer_size>;
-
-    size_t buffer_id() const noexcept { return total_size_ / buffer_size; }
-    size_t item_id() const noexcept { return total_size_ % buffer_size; }
+  friend class flat_linear_allocator_iterator<
+    flat_linear_allocator<RealType, buffer_size>,
+    buffer_size>;
+  friend class flat_linear_allocator_iterator<
+    const flat_linear_allocator<RealType, buffer_size>,
+    buffer_size>;
+  using value_type = RealType;
+  using iterator =
+    flat_linear_allocator_iterator<flat_linear_allocator<RealType, buffer_size>,
+                                   buffer_size>;
+  using const_iterator = flat_linear_allocator_iterator<
+    const flat_linear_allocator<RealType, buffer_size>,
+    buffer_size>;
+
+  size_t buffer_id() const noexcept { return total_size_ / buffer_size; }
+  size_t item_id() const noexcept { return total_size_ % buffer_size; }
 
 private:
-    void allocate_buffer()
-    {
-        data_.emplace_back(std::make_unique<buffer_type>());
-    }
+  void allocate_buffer()
+  {
+    data_.emplace_back(std::make_unique<buffer_type>());
+  }
 
 public:
-    flat_linear_allocator() { allocate_buffer(); }
-    flat_linear_allocator(const flat_linear_allocator &)            = delete;
-    flat_linear_allocator &operator=(const flat_linear_allocator &) = delete;
-    flat_linear_allocator(flat_linear_allocator &&)                 = delete;
-    flat_linear_allocator &operator=(flat_linear_allocator &&)      = delete;
-    ~flat_linear_allocator()
-    {
-        destroy_all();
-        data_.clear();
-    }
-
-    void destroy_all()
-    {
-        for (size_t i = 0; i < total_size_; ++i) {
-            size_t bid = i / buffer_size;
-            size_t iid = i % buffer_size;
-            (*data_[bid])[iid].~RealType();
-        }
+  flat_linear_allocator() { allocate_buffer(); }
+  flat_linear_allocator(const flat_linear_allocator&) = delete;
+  flat_linear_allocator& operator=(const flat_linear_allocator&) = delete;
+  flat_linear_allocator(flat_linear_allocator&&) = delete;
+  flat_linear_allocator& operator=(flat_linear_allocator&&) = delete;
+  ~flat_linear_allocator()
+  {
+    destroy_all();
+    data_.clear();
+  }
+
+  void destroy_all()
+  {
+    for (size_t i = 0; i < total_size_; ++i) {
+      size_t bid = i / buffer_size;
+      size_t iid = i % buffer_size;
+      (*data_[bid])[iid].~RealType();
     }
-    /** @brief
+  }
+  /** @brief
    * helper functions to clear tape and create block in tape
    */
-    void clear()
-    {
-        data_.clear();
-        total_size_ = 0;
-        checkpoints_.clear();
-        allocate_buffer();
-    }
-
-    // doesn't delete anything, only sets the current index to zero
-    void reset() { total_size_ = 0; }
-    void rewind() { total_size_ = 0; };
-
-    // adds current index as a checkpoint to be able to walk back to
-    void add_checkpoint()
-    {
-        if (total_size_ > 0) {
-            checkpoints_.push_back(total_size_ - 1);
-        } else {
-            checkpoints_.push_back(0);
-        }
-    };
-
-    /** @brief clears all checkpoints
+  void clear()
+  {
+    data_.clear();
+    total_size_ = 0;
+    checkpoints_.clear();
+    allocate_buffer();
+  }
+
+  // doesn't delete anything, only sets the current index to zero
+  void reset() { total_size_ = 0; }
+  void rewind() { total_size_ = 0; };
+
+  // adds current index as a checkpoint to be able to walk back to
+  void add_checkpoint()
+  {
+    checkpoints_.push_back(total_size_);
+    /*
+     * if (total_size_ > 0) {
+      checkpoints_.push_back(total_size_ - 1);
+    } else {
+      checkpoints_.push_back(0);
+    }*/
+  };
+
+  /** @brief clears all checkpoints
    * */
-    void reset_checkpoints() { checkpoints_.clear(); }
-
-    void rewind_to_last_checkpoint() { total_size_ = checkpoints_.back(); }
-    void rewind_to_checkpoint_at(size_t index) { total_size_ = checkpoints_[index]; }
-
-    void fill(const RealType &val)
-    {
-        for (size_t i = 0; i < total_size_; ++i) {
-            size_t bid         = i / buffer_size;
-            size_t iid         = i % buffer_size;
-            (*data_[bid])[iid] = val;
-        }
+  void reset_checkpoints() { checkpoints_.clear(); }
+
+  void rewind_to_last_checkpoint() { total_size_ = checkpoints_.back(); }
+  void rewind_to_checkpoint_at(size_t index)
+  {
+    total_size_ = checkpoints_[index];
+  }
+
+  void fill(const RealType& val)
+  {
+    for (size_t i = 0; i < total_size_; ++i) {
+      size_t bid = i / buffer_size;
+      size_t iid = i % buffer_size;
+      (*data_[bid])[iid] = val;
     }
+  }
 
-    /** @brief emplaces back object at the end of the
+  /** @brief emplaces back object at the end of the
    * data structure, calls default constructor */
-    iterator emplace_back()
-    {
-        if (item_id() == 0 && total_size_ != 0) {
-            allocate_buffer();
-        }
-        size_t bid = buffer_id();
-        size_t iid = item_id();
-
-        RealType *ptr = &(*data_[bid])[iid];
-        new (ptr) RealType();
-        ++total_size_;
-        return iterator(this, total_size_ - 1);
-    };
-
-    /** @brief, emplaces back object at end of data structure,
+  iterator emplace_back()
+  {
+    if (item_id() == 0 && total_size_ != 0) {
+      allocate_buffer();
+    }
+    size_t bid = buffer_id();
+    size_t iid = item_id();
+
+    RealType* ptr = &(*data_[bid])[iid];
+    new (ptr) RealType();
+    ++total_size_;
+    return iterator(this, total_size_ - 1);
+  };
+
+  /** @brief, emplaces back object at end of data structure,
    * passes arguments to constructor */
-    template<typename... Args>
-    iterator emplace_back(Args &&...args)
-    {
-        if (item_id() == 0 && total_size_ != 0) {
-            allocate_buffer();
-        }
-        BOOST_MATH_ASSERT(buffer_id() < data_.size());
-        BOOST_MATH_ASSERT(item_id() < buffer_size);
-        RealType *ptr = &(*data_[buffer_id()])[item_id()];
-        new (ptr) RealType(std::forward<Args>(args)...);
-        ++total_size_;
-        return iterator(this, total_size_ - 1);
+  template<typename... Args>
+  iterator emplace_back(Args&&... args)
+  {
+    if (item_id() == 0 && total_size_ != 0) {
+      allocate_buffer();
     }
-    /** @brief default constructs n objects at end of
+    BOOST_MATH_ASSERT(buffer_id() < data_.size());
+    BOOST_MATH_ASSERT(item_id() < buffer_size);
+    RealType* ptr = &(*data_[buffer_id()])[item_id()];
+    new (ptr) RealType(std::forward<Args>(args)...);
+    ++total_size_;
+    return iterator(this, total_size_ - 1);
+  }
+  /** @brief default constructs n objects at end of
    * data structure, n known at compile time */
-    template<size_t n>
-    iterator emplace_back_n()
-    {
-        size_t bid = buffer_id();
-        size_t iid = item_id();
-        if (iid + n < buffer_size) {
-            RealType *ptr = &(*data_[bid])[iid];
-            for (size_t i = 0; i < n; ++i) {
-                new (ptr + i) RealType();
-            }
-            total_size_ += n;
-            return iterator(this, total_size_ - n, total_size_ - n, total_size_);
-        } else {
-            size_t allocs_in_curr_buffer = buffer_size - iid;
-            size_t allocs_in_next_buffer = n - (buffer_size - iid);
-            RealType *ptr                   = &(*data_[bid])[iid];
-            for (size_t i = 0; i < allocs_in_curr_buffer; ++i) {
-                new (ptr + i) RealType();
-            }
-            allocate_buffer();
-            bid = data_.size() - 1;
-            iid = 0;
-            total_size_ += n;
-
-            RealType *ptr2 = &(*data_[bid])[iid];
-            for (size_t i = 0; i < allocs_in_next_buffer; i++) {
-                new (ptr2 + i) RealType();
-            }
-            return iterator(this, total_size_ - n, total_size_ - n, total_size_);
-        }
+  template<size_t n>
+  iterator emplace_back_n()
+  {
+    size_t bid = buffer_id();
+    size_t iid = item_id();
+    if (iid + n < buffer_size) {
+      RealType* ptr = &(*data_[bid])[iid];
+      for (size_t i = 0; i < n; ++i) {
+        new (ptr + i) RealType();
+      }
+      total_size_ += n;
+      return iterator(this, total_size_ - n, total_size_ - n, total_size_);
+    } else {
+      size_t allocs_in_curr_buffer = buffer_size - iid;
+      size_t allocs_in_next_buffer = n - (buffer_size - iid);
+      RealType* ptr = &(*data_[bid])[iid];
+      for (size_t i = 0; i < allocs_in_curr_buffer; ++i) {
+        new (ptr + i) RealType();
+      }
+      allocate_buffer();
+      bid = data_.size() - 1;
+      iid = 0;
+      total_size_ += n;
+
+      RealType* ptr2 = &(*data_[bid])[iid];
+      for (size_t i = 0; i < allocs_in_next_buffer; i++) {
+        new (ptr2 + i) RealType();
+      }
+      return iterator(this, total_size_ - n, total_size_ - n, total_size_);
     }
-    /** @brief default constructs n objects at end of
+  }
+  /** @brief default constructs n objects at end of
    * data structure, n known at run time
    */
-    iterator emplace_back_n(size_t n)
-    {
-        size_t bid = buffer_id();
-        size_t iid = item_id();
-        if (iid + n < buffer_size) {
-            RealType *ptr = &(*data_[bid])[iid];
-            for (size_t i = 0; i < n; ++i) {
-                new (ptr + i) RealType();
-            }
-            total_size_ += n;
-            return iterator(this, total_size_ - n, total_size_ - n, total_size_);
-        } else {
-            size_t allocs_in_curr_buffer = buffer_size - iid;
-            size_t allocs_in_next_buffer = n - (buffer_size - iid);
-            RealType *ptr                   = &(*data_[bid])[iid];
-            for (size_t i = 0; i < allocs_in_curr_buffer; ++i) {
-                new (ptr + i) RealType();
-            }
-            allocate_buffer();
-            bid = data_.size() - 1;
-            iid = 0;
-            total_size_ += n;
-
-            RealType *ptr2 = &(*data_[bid])[iid];
-            for (size_t i = 0; i < allocs_in_next_buffer; i++) {
-                new (ptr2 + i) RealType();
-            }
-            return iterator(this, total_size_ - n, total_size_ - n, total_size_);
-        }
+  iterator emplace_back_n(size_t n)
+  {
+    size_t bid = buffer_id();
+    size_t iid = item_id();
+    if (iid + n < buffer_size) {
+      RealType* ptr = &(*data_[bid])[iid];
+      for (size_t i = 0; i < n; ++i) {
+        new (ptr + i) RealType();
+      }
+      total_size_ += n;
+      return iterator(this, total_size_ - n, total_size_ - n, total_size_);
+    } else {
+      size_t allocs_in_curr_buffer = buffer_size - iid;
+      size_t allocs_in_next_buffer = n - (buffer_size - iid);
+      RealType* ptr = &(*data_[bid])[iid];
+      for (size_t i = 0; i < allocs_in_curr_buffer; ++i) {
+        new (ptr + i) RealType();
+      }
+      allocate_buffer();
+      bid = data_.size() - 1;
+      iid = 0;
+      total_size_ += n;
+
+      RealType* ptr2 = &(*data_[bid])[iid];
+      for (size_t i = 0; i < allocs_in_next_buffer; i++) {
+        new (ptr2 + i) RealType();
+      }
+      return iterator(this, total_size_ - n, total_size_ - n, total_size_);
     }
-
-    /** @brief number of elements */
-    size_t size() const { return total_size_; }
-
-    /** @brief total capacity */
-    size_t capacity() const { return data_.size() * buffer_size; }
-
-    /** @brief iterator helpers */
-    iterator       begin() { return iterator(this, 0); }
-    iterator       end() { return iterator(this, total_size_); }
-    const_iterator begin() const { return const_iterator(this, 0); }
-    const_iterator end() const { return const_iterator(this, total_size_); }
-
-    iterator last_checkpoint() { return iterator(this, checkpoints_.back(), 0, total_size_); }
-    iterator first_checkpoint() { return iterator(this, checkpoints_[0], 0, total_size_); };
-    iterator checkpoint_at(size_t index)
-    {
-        return iterator(this, checkpoints_[index], 0, total_size_);
-    };
-
-    /** @brief searches for item in allocator
+  }
+
+  /** @brief number of elements */
+  size_t size() const { return total_size_; }
+
+  /** @brief total capacity */
+  size_t capacity() const { return data_.size() * buffer_size; }
+
+  /** @brief iterator helpers */
+  iterator begin() { return iterator(this, 0); }
+  iterator end() { return iterator(this, total_size_); }
+  const_iterator begin() const { return const_iterator(this, 0); }
+  const_iterator end() const { return const_iterator(this, total_size_); }
+
+  iterator last_checkpoint()
+  {
+    return iterator(this, checkpoints_.back(), 0, total_size_);
+  }
+  iterator first_checkpoint()
+  {
+    return iterator(this, checkpoints_[0], 0, total_size_);
+  };
+  iterator checkpoint_at(size_t index)
+  {
+    return iterator(this, checkpoints_[index], 0, total_size_);
+  };
+
+  /** @brief searches for item in allocator
    *  only used to find gradient nodes for propagation */
-    iterator find(const RealType *const item)
-    {
-        return std::find_if(begin(), end(), [&](const RealType &val) { return &val == item; });
-    }
-    /** @brief vector like access,
+  iterator find(const RealType* const item)
+  {
+    return std::find_if(
+      begin(), end(), [&](const RealType& val) { return &val == item; });
+  }
+  /** @brief vector like access,
    *  currently unused anywhere but very useful for debugging
    */
-    RealType &operator[](std::size_t i)
-    {
-        BOOST_MATH_ASSERT(i < total_size_);
-        return (*data_[i / buffer_size])[i % buffer_size];
-    }
-    const RealType &operator[](std::size_t i) const
-    {
-        BOOST_MATH_ASSERT(i < total_size_);
-        return (*data_[i / buffer_size])[i % buffer_size];
-    }
+  RealType& operator[](std::size_t i)
+  {
+    BOOST_MATH_ASSERT(i < total_size_);
+    return (*data_[i / buffer_size])[i % buffer_size];
+  }
+  const RealType& operator[](std::size_t i) const
+  {
+    BOOST_MATH_ASSERT(i < total_size_);
+    return (*data_[i / buffer_size])[i % buffer_size];
+  }
 };
 } // namespace detail
 } // namespace reverse_mode
diff --git a/include/boost/math/optimization/detail/differentiable_opt_utilties.hpp b/include/boost/math/optimization/detail/differentiable_opt_utilties.hpp
new file mode 100644
index 0000000000..9e7e989ef7
--- /dev/null
+++ b/include/boost/math/optimization/detail/differentiable_opt_utilties.hpp
@@ -0,0 +1,166 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+#ifndef DIFFERENTIABLE_OPT_UTILITIES_HPP
+#define DIFFERENTIABLE_OPT_UTILITIES_HPP
+#include <boost/math/differentiation/autodiff_reverse.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include <cmath>
+#include <random>
+#include <type_traits>
+#include <vector>
+
+namespace boost {
+namespace math {
+namespace optimization {
+
+namespace rdiff = boost::math::differentiation::reverse_mode;
+
+/** @brief> helper to get the underlying realtype from
+ *  update policy
+ *  */
+template<typename UpdPol>
+struct update_policy_real_type;
+
+template<template<typename> class UpdPol, typename RealType>
+struct update_policy_real_type<UpdPol<RealType>>
+{
+  using type = RealType;
+};
+
+template<typename UpdPol>
+using update_policy_real_type_t =
+  typename update_policy_real_type<typename std::decay<UpdPol>::type>::type;
+
+/** @brief> get realtype from argument container
+ *  */
+template<class Container>
+struct argument_container_t
+{
+    using type = typename argument_container_t<typename std::decay<Container>::type>::type;
+};
+
+template<typename ValueType, std::size_t N>
+struct argument_container_t<std::array<ValueType, N>>
+{
+    using type = ValueType;
+};
+
+template<typename RealType, std::size_t M, std::size_t N>
+struct argument_container_t<std::array<rdiff::rvar<RealType, M>, N>>
+{
+    using type = RealType;
+};
+
+template<template<typename, typename...> class Container, typename ValueType, typename... Args>
+struct argument_container_t<Container<ValueType, Args...>>
+{
+    using type = ValueType;
+};
+
+template<template<typename, typename...> class Container,
+         typename RealType,
+         std::size_t N,
+         typename... Args>
+struct argument_container_t<Container<rdiff::rvar<RealType, N>, Args...>>
+{
+    using type = RealType;
+};
+/******************************************************************************/
+/** @brief simple blas helpers
+ * may optimize later if benchmarks show its needed, or just switch to Eigen
+ */
+template<typename Container>
+auto
+dot(const Container& x, const Container& y) -> typename Container::value_type
+{
+  using T = typename Container::value_type;
+  BOOST_MATH_ASSERT(x.size() == y.size());
+  return std::inner_product(x.begin(), x.end(), y.begin(), T(0));
+}
+
+template<typename Container>
+auto
+norm_2(const Container& x) -> typename Container::value_type
+{
+  return sqrt(dot(x, x));
+}
+
+template<typename Container>
+auto
+norm_1(const Container& x) -> typename Container::value_type
+{
+  using T = typename Container::value_type;
+  T ret{ 0 };
+  for (auto& xi : x) {
+    ret += abs(xi);
+  }
+  return ret;
+}
+
+template<typename T>
+T
+norm_inf(const std::vector<T>& x)
+{
+  BOOST_ASSERT(!x.empty());
+
+  T max_val = std::abs(x[0]);
+  const std::size_t n = x.size();
+
+  for (std::size_t i = 1; i < n; ++i) {
+    const T abs_val = std::abs(x[i]);
+    if (abs_val > max_val)
+      max_val = abs_val;
+  }
+  return max_val;
+}
+/** @brief alpha*x (alpha is scalar, x is vector */
+template<typename Container, typename RealType>
+void
+scale(Container& x, const RealType& alpha)
+{
+  for (auto& xi : x) {
+    xi *= alpha;
+  }
+}
+
+/** @brief y += alpha * x
+ */
+template<typename ContainerX, typename ContainerY, typename RealType>
+void
+axpy(RealType alpha, const ContainerX& x, ContainerY& y)
+{
+  BOOST_MATH_ASSERT(x.size() == y.size());
+  const size_t n = x.size();
+  for (size_t i = 0; i < n; ++i) {
+    y[i] += alpha * x[i];
+  }
+}
+/******************************************************************************/
+template<typename RealType>
+std::vector<RealType>
+random_vector(size_t n)
+{
+  /** @brief> generates a random std::vector<RealType> of size n
+   * using mt19937 algorithm
+   */
+
+  /** TODO: these may need to be marked thread local
+   * in the future
+   *
+   * TODO: benchmark.
+   */
+  static boost::random::mt19937 rng{ std::random_device{}() };
+  static boost::random::uniform_real_distribution<RealType> dist(0.0, 1.0);
+
+  std::vector<RealType> result(n);
+  std::generate(result.begin(), result.end(), [&] { return dist(rng); });
+  return result;
+}
+
+} // namespace optimization
+} // namespace math
+} // namespace boost
+#endif
diff --git a/include/boost/math/optimization/detail/gradient_opt_base.hpp b/include/boost/math/optimization/detail/gradient_opt_base.hpp
new file mode 100644
index 0000000000..58fd3ac2bc
--- /dev/null
+++ b/include/boost/math/optimization/detail/gradient_opt_base.hpp
@@ -0,0 +1,92 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+#ifndef GRADIENT_OPT_BASE_HPP
+#define GRADIENT_OPT_BASE_HPP
+#include <boost/math/differentiation/autodiff_reverse.hpp>
+
+namespace boost {
+namespace math {
+namespace optimization {
+
+namespace rdiff = boost::math::differentiation::reverse_mode;
+
+/**
+ * @brief The abstract_optimizer class implementing common variables
+ * and methods across optimizers
+ *
+ * @tparam> ArgumentContainer
+ * @tparam> RealType
+ * @tparam> Objective
+ * @tparam> InitializationPolicy
+ *
+ */
+
+template<typename ArgumentContainer,
+         typename RealType,
+         class Objective,
+         class InitializationPolicy,
+         class ObjectiveEvalPolicy,
+         class GradEvalPolicy,
+         class UpdatePolicy,
+         typename DerivedOptimizer>
+class abstract_optimizer
+{
+protected:
+  Objective objective_;          // obj function
+  ArgumentContainer& x_;         // arguments to objective function
+  std::vector<RealType> g_;      // container of references to gradients
+  ObjectiveEvalPolicy obj_eval_; // how to evaluate your funciton
+  GradEvalPolicy grad_eval_;     // how to evaluate/bind gradients
+  InitializationPolicy init_;    // how to initialize the problem
+  UpdatePolicy update_;          // update step
+  RealType obj_v_;               // objective value (for history)
+  // access derived class
+  DerivedOptimizer& derived() { return static_cast<DerivedOptimizer&>(*this); }
+  const DerivedOptimizer& derived() const
+  {
+    return static_cast<const DerivedOptimizer&>(*this);
+  }
+
+  void step_impl()
+  {
+    grad_eval_(objective_, x_, obj_eval_, obj_v_, g_);
+    for (size_t i = 0; i < x_.size(); ++i) {
+      update_(x_[i], g_[i]);
+    }
+  };
+
+public:
+  using argument_container_t = ArgumentContainer;
+  using real_type_t = RealType;
+
+  abstract_optimizer(Objective&& objective,
+                     ArgumentContainer& x,
+                     InitializationPolicy&& ip,
+                     ObjectiveEvalPolicy&& oep,
+                     GradEvalPolicy&& gep,
+                     UpdatePolicy&& up)
+    : objective_(std::forward<Objective>(objective))
+    , x_(x)
+    , obj_eval_(std::forward<ObjectiveEvalPolicy>(oep))
+    , grad_eval_(std::forward<GradEvalPolicy>(gep))
+    , init_(std::forward<InitializationPolicy>(ip))
+    , update_(std::forward<UpdatePolicy>(up))
+  {
+    init_(x_);            // initialize your problem
+    g_.resize(x_.size()); // initialize space for gradients
+  }
+
+  ArgumentContainer& arguments() { return derived().x_; }
+  const ArgumentContainer& arguments() const { return derived().x_; }
+
+  RealType& objective_value() { return derived().obj_v_; }
+  const RealType& objective_value() const { return derived().obj_v_; }
+  std::vector<RealType>& gradients() { return derived().g_; }
+  const std::vector<RealType>& gradients() const { return derived().g_; }
+};
+} // namespace optimization
+} // namespace math
+} // namespace boost
+#endif
diff --git a/include/boost/math/optimization/detail/line_search_policies.hpp b/include/boost/math/optimization/detail/line_search_policies.hpp
new file mode 100644
index 0000000000..0dd85867b4
--- /dev/null
+++ b/include/boost/math/optimization/detail/line_search_policies.hpp
@@ -0,0 +1,208 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef LINE_SEARCH_POLICIES_HPP
+#define LINE_SEARCH_POLICIES_HPP
+
+#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
+#include <cmath>
+#include <iostream>
+#include <numeric>
+#include <vector>
+namespace boost {
+namespace math {
+namespace optimization {
+
+/**
+ * @brief> Armijo condition backtracking line search
+ * https://en.wikipedia.org/wiki/Backtracking_line_search
+ *
+ *  f(x+alpha p) <= f(x) + alpha * c * grad(f)^T
+ *
+ * Jorge Nocedal and Stephen J. Wright,
+ *  Numerical Optimization, 2nd Edition,
+ *  Springer, 2006.
+ *
+ *  Algorithm 3.1: Backtracking Line Search
+ *  (Page 37)
+ */
+template<typename RealType>
+class armijo_line_search_policy
+{
+private:
+  RealType alpha0_; // initial step size
+  RealType c_;      // sufficient decrease constant
+  RealType rho_;    // backtracking factor
+  int max_iter_;    // maximum backtracking steps
+
+public:
+  armijo_line_search_policy(RealType alpha0 = 1.0,
+                            RealType c = 1e-4,
+                            RealType rho = 0.5,
+                            int max_iter = 20)
+    : alpha0_(alpha0)
+    , c_(c)
+    , rho_(rho)
+    , max_iter_(max_iter)
+  {
+  }
+
+  template<class Objective,
+           class ObjectiveEvalPolicy,
+           class GradientEvalPolicy,
+           class ArgumentContainer>
+  RealType operator()(Objective& objective,
+                      ObjectiveEvalPolicy& obj_eval,
+                      GradientEvalPolicy& grad_eval,
+                      ArgumentContainer& x,
+                      const std::vector<RealType>& g,
+                      const std::vector<RealType>& p,
+                      RealType f_x) const
+  {
+    /** @brief> line search
+     * */
+    RealType alpha = alpha0_;
+    ArgumentContainer x_trial; // = x; // copy
+    const RealType gTp = dot(g, p);
+
+    for (int iter = 0; iter < max_iter_; ++iter) {
+      x_trial = x;
+      axpy(alpha, p, x_trial);
+      auto f_trial = obj_eval(objective, x_trial);
+      if (f_trial <=
+          f_x + c_ * alpha * gTp) // check if armijo condition is satisfied
+        return alpha;
+      alpha *= rho_;
+    }
+    return alpha;
+  }
+};
+
+/** @brief> Strong-Wolfe line search:
+ *  Jorge Nocedal and Stephen J. Wright,
+ *  Numerical Optimization, 2nd Edition,
+ *  Springer, 2006.
+ *
+ *  Algorithm 3.5  Line Search Algorithm (Strong Wolfe Conditions)
+ *  pages 60-61
+ */
+template<typename RealType>
+class strong_wolfe_line_search_policy
+{
+private:
+  RealType alpha0_; // initial step size
+  RealType c1_;     // Armijo constant (sufficient decrease)
+  RealType c2_;     // curvature constant
+  RealType rho_;    // backtracking factor
+  int max_iter_;    // maximum iterations
+public:
+  strong_wolfe_line_search_policy(RealType alpha0 = 1.0,
+                                  RealType c1 = 1e-4,
+                                  RealType c2 = 0.9,
+                                  RealType rho = 2.0,
+                                  int max_iter = 20)
+    : alpha0_(alpha0)
+    , c1_(c1)
+    , c2_(c2)
+    , rho_(rho)
+    , max_iter_(max_iter)
+  {
+  }
+  template<class Objective,
+           class ObjectiveEvalPolicy,
+           class GradientEvalPolicy,
+           class ArgumentContainer>
+  RealType operator()(Objective& objective,
+                      ObjectiveEvalPolicy& obj_eval,
+                      GradientEvalPolicy& grad_eval,
+                      ArgumentContainer& x,
+                      const std::vector<RealType>& g,
+                      const std::vector<RealType>& p,
+                      RealType f_x) const
+  {
+    RealType gTp0 = dot(g, p);
+    RealType alpha_prev{0};
+    RealType f_prev = f_x;
+    RealType alpha = alpha0_;
+
+    ArgumentContainer x_trial;
+    std::vector<RealType> g_trial(g.size());
+    for (int i = 0; i < max_iter_; ++i) {
+      x_trial = x; // explicit copy
+      axpy(alpha, p, x_trial);
+      RealType f_trial = static_cast<RealType>(obj_eval(objective, x_trial));
+      if ((f_trial > f_x + c1_ * alpha * gTp0) ||
+          (i > 0 && f_trial >= f_prev)) {
+        return zoom(
+          objective, obj_eval, grad_eval, x, p, f_x, gTp0, alpha_prev, alpha);
+      }
+      grad_eval(objective, x_trial, obj_eval, f_trial, g_trial);
+      RealType gTp = dot(g_trial, p);
+
+      if (fabs(gTp) <= c2_ * fabs(gTp0)) {
+        return alpha;
+      }
+
+      if (gTp >= 0) {
+        return zoom(
+          objective, obj_eval, grad_eval, x, p, f_x, gTp0, alpha, alpha_prev);
+      }
+      alpha_prev = alpha;
+      f_prev = f_trial;
+      alpha *= rho_;
+    }
+
+    return alpha;
+  }
+
+private:
+  template<class Objective,
+           class ObjectiveEvalPolicy,
+           class GradientEvalPolicy,
+           class ArgumentContainer>
+  RealType zoom(Objective& objective,
+                ObjectiveEvalPolicy& obj_eval,
+                GradientEvalPolicy& grad_eval,
+                ArgumentContainer& x,
+                const std::vector<RealType>& p,
+                RealType f_x,
+                RealType gTp0,
+                RealType alpha_lo,
+                RealType alpha_hi) const
+  {
+    ArgumentContainer x_trial;
+    std::vector<RealType> g_trial(p.size());
+
+    for (int iter = 0; iter < max_iter_; ++iter) {
+      const RealType alpha_mid = 0.5 * (alpha_lo + alpha_hi);
+      x_trial = x;
+      axpy(alpha_mid, p, x_trial);
+      RealType f_mid;
+      grad_eval(objective, x_trial, obj_eval, f_mid, g_trial);
+      RealType gTp_mid = dot(g_trial, p);
+      ArgumentContainer x_lo = x;
+      axpy(alpha_lo, p, x_lo);
+      RealType f_lo = static_cast<RealType>(obj_eval(objective, x_lo));
+      if ((f_mid > f_x + c1_ * alpha_mid * gTp0) || (f_mid >= f_lo)) {
+        alpha_hi = alpha_mid;
+      } else {
+        if (fabs(gTp_mid) <= c2_ * fabs(gTp0)) {
+          return alpha_mid;
+        }
+        if (gTp_mid * (alpha_hi - alpha_lo) >= 0) {
+          alpha_hi = alpha_lo;
+        }
+        alpha_lo = alpha_mid;
+      }
+    }
+
+    return 0.5 * (alpha_lo + alpha_hi);
+  }
+};
+
+} // namespace optimization
+} // namespace math
+} // namespace boost
+#endif // LINE_SEARCH_POLICIES_HPP
diff --git a/include/boost/math/optimization/detail/rdiff_optimization_policies.hpp b/include/boost/math/optimization/detail/rdiff_optimization_policies.hpp
new file mode 100644
index 0000000000..5258728bd4
--- /dev/null
+++ b/include/boost/math/optimization/detail/rdiff_optimization_policies.hpp
@@ -0,0 +1,134 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+#ifndef RDIFF_OPTIMIZATION_POLICIES_HPP__
+#define RDIFF_OPTIMIZATION_POLICIES_HPP__
+
+#include <boost/math/differentiation/autodiff_reverse.hpp>
+#include <boost/random.hpp>
+#include <random>
+#include <type_traits>
+namespace boost {
+namespace math {
+namespace optimization {
+
+namespace rdiff = boost::math::differentiation::reverse_mode;
+
+/******************************************************************/
+/**
+ * @brief> function evaluation policy for reverse mode autodiff
+ * @arg objective> objective function to evaluate
+ * @arg x> argument list
+ */
+template<typename RealType>
+struct reverse_mode_function_eval_policy
+{
+  template<typename Objective, class ArgumentContainer>
+  rdiff::rvar<RealType, 1> operator()(Objective&& objective,
+                                      ArgumentContainer& x)
+  {
+    auto& tape = rdiff::get_active_tape<RealType, 1>();
+    tape.zero_grad();
+    tape.rewind_to_last_checkpoint();
+
+    return objective(x);
+  }
+};
+/******************************************************************/
+
+/**
+ * @brief> gradient evaluation policy
+ * @arg obj_f> objective
+ * @arg x> argument list
+ * @arg f_eval_olicy> funciton evaluation policy. These need to be
+ *                    done in tandem
+ * @arg obj_v> reference to variable inside gradient class
+ */
+template<typename RealType>
+struct reverse_mode_gradient_evaluation_policy
+{
+  template<class Objective,
+           class ArgumentContainer,
+           class FunctionEvaluationPolicy>
+  void operator()(Objective&& obj_f,
+                  ArgumentContainer& x,
+                  FunctionEvaluationPolicy&& f_eval_pol,
+                  RealType& obj_v,
+                  std::vector<RealType>& g)
+  {
+    // compute objective via eval policy that takes care of tape
+    rdiff::rvar<RealType, 1> v = f_eval_pol(obj_f, x);
+    v.backward();
+    obj_v = v.item();
+    g.resize(x.size());
+
+    // copy gradients into gradient vector
+    for (size_t i = 0; i < x.size(); ++i) {
+      g[i] = x[i].adjoint();
+    }
+  }
+};
+/******************************************************************
+ * init policies
+ */
+template<typename RealType>
+struct tape_initializer_rvar
+{
+    template<class ArgumentContainer>
+    void operator()(ArgumentContainer& x) const noexcept
+    {
+        static_assert(std::is_same<typename ArgumentContainer::value_type,
+                                   rdiff::rvar<RealType, 1>>::value,
+                      "ArgumentContainer::value_type must be rdiff::rvar<RealType,1>");
+        auto& tape = rdiff::get_active_tape<RealType, 1>();
+        tape.add_checkpoint();
+    }
+};
+
+template<typename RealType>
+struct random_uniform_initializer_rvar
+{
+  RealType low_, high_;
+  size_t seed_;
+  random_uniform_initializer_rvar(RealType low = 0,
+                                  RealType high = 1,
+                                  size_t seed = std::random_device{}())
+    : low_(low)
+    , high_(high)
+    , seed_(seed) {};
+
+  template<class ArgumentContainer>
+  void operator()(ArgumentContainer& x) const
+  {
+    boost::random::mt19937 gen(seed_);
+    boost::random::uniform_real_distribution<RealType> dist(low_, high_);
+    for (auto& xi : x) {
+      xi = rdiff::rvar<RealType, 1>(dist(gen));
+    }
+    auto& tape = rdiff::get_active_tape<RealType, 1>();
+    tape.add_checkpoint();
+  }
+};
+
+template<typename RealType>
+struct costant_initializer_rvar
+{
+  RealType constant;
+  explicit costant_initializer_rvar(RealType v = 0)
+    : constant(v) {};
+  template<class ArgumentContainer>
+  void operator()(ArgumentContainer& x) const
+  {
+    for (auto& xi : x) {
+      xi = rdiff::rvar<RealType, 1>(constant);
+    }
+    auto& tape = rdiff::get_active_tape<RealType, 1>();
+    tape.add_checkpoint();
+  }
+};
+} // namespace optimization
+} // namespace math
+} // namespace boost
+
+#endif
diff --git a/include/boost/math/optimization/gradient_descent.hpp b/include/boost/math/optimization/gradient_descent.hpp
new file mode 100644
index 0000000000..f88f464c8f
--- /dev/null
+++ b/include/boost/math/optimization/gradient_descent.hpp
@@ -0,0 +1,207 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+#ifndef GRADIENT_DESCENT_HPP
+#define GRADIENT_DESCENT_HPP
+#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
+#include <boost/math/optimization/detail/gradient_opt_base.hpp>
+#include <boost/math/optimization/detail/rdiff_optimization_policies.hpp>
+namespace rdiff = boost::math::differentiation::reverse_mode;
+
+namespace boost {
+namespace math {
+namespace optimization {
+
+template<typename RealType>
+struct gradient_descent_update_policy
+{
+  RealType lr_;
+  gradient_descent_update_policy(RealType lr)
+    : lr_(lr) {};
+
+  template<typename ArgumentType,
+           typename = typename std::enable_if<
+             boost::math::differentiation::reverse_mode::detail::is_expression<
+               ArgumentType>::value>::type>
+  void operator()(ArgumentType& x, RealType& g)
+  {
+    // this update effectively "mutes" the tape
+    // TODO: add a tape scope guard method so that
+    // you can do math on autodiff types without
+    // accumulating gradients
+    x.get_value() -= lr_ * g;
+  }
+  template<typename ArgumentType,
+           typename std::enable_if<!boost::math::differentiation::reverse_mode::
+                                     detail::is_expression<ArgumentType>::value,
+                                   int>::type = 0>
+  void operator()(ArgumentType& x, RealType& g) const
+  {
+    x -= lr_ * g;
+  }
+};
+
+/**
+ * @brief> Gradient Descent Optimizer.
+ *
+ * This class implements a gradient descent optimization strategy using the
+ * policies provided for initialization, objective evaluation, and gradient
+ * computation. It inherits from `abstract_optimizer`, which provides the
+ * general optimization framework.
+ *
+ * @tparam> ArgumentContainer Type of the parameter container (e.g.
+ *          std::vector<SomeDifferentiableType or RealType>).
+ * @tparam> RealType          Floating-point type
+ * @tparam> Objective         Objective function type (functor or callable).
+ * @tparam> InitializationPolicy Policy controlling initialization of
+ *          differentiable variables.
+ * @tparam> ObjectiveEvalPolicy  Policy defining how the objective is evaluated.
+ * @tparam> GradEvalPolicy       Policy defining how the gradient is computed.
+ */
+
+template<typename ArgumentContainer,
+         typename RealType,
+         class Objective,
+         class InitializationPolicy,
+         class ObjectiveEvalPolicy,
+         class GradEvalPolicy>
+class gradient_descent
+  : public abstract_optimizer<ArgumentContainer,
+                              RealType,
+                              Objective,
+                              InitializationPolicy,
+                              ObjectiveEvalPolicy,
+                              GradEvalPolicy,
+                              gradient_descent_update_policy<RealType>,
+                              gradient_descent<ArgumentContainer,
+                                               RealType,
+                                               Objective,
+                                               InitializationPolicy,
+                                               ObjectiveEvalPolicy,
+                                               GradEvalPolicy>>
+{
+  using base_opt = abstract_optimizer<ArgumentContainer,
+                                      RealType,
+                                      Objective,
+                                      InitializationPolicy,
+                                      ObjectiveEvalPolicy,
+                                      GradEvalPolicy,
+                                      gradient_descent_update_policy<RealType>,
+                                      gradient_descent<ArgumentContainer,
+                                                       RealType,
+                                                       Objective,
+                                                       InitializationPolicy,
+                                                       ObjectiveEvalPolicy,
+                                                       GradEvalPolicy>>;
+
+public:
+  using base_opt::base_opt;
+  /**
+   * @brief Perform one optimization step.
+   *
+   * defaults to a regular update inside abstract optimizer
+   * x -= lr * grad(f)
+   */
+  void step() { this->step_impl(); }
+};
+
+/**
+ * @brief Create a gradient descent optimizer with default reverse-mode autodiff
+ policies.
+ *
+ * make_gradient_descent(objective, x)
+ *      constructs gradient descent with objective function, and parameters x
+ *
+ *      learning rate set to 0.01 by default
+ *
+ *      initialization strategy : use specified, gradient tape taken care of by
+ *              optimizer
+ *      function eval policy : rvar function eval policy, to be used with
+ *              boost::math::differentiation::reverse_mode::rvar
+ *
+ *      gradient eval policy : rvar gradient evaluation policy
+ *
+ *      gradient descent update policy :
+ *              basically x -= lr * grad(f);
+ *
+ * make_gradient_descent(objective, x, lr)
+ *      custom learning rate
+*/
+
+template<class Objective, typename ArgumentContainer, typename RealType>
+auto
+make_gradient_descent(Objective&& obj,
+                      ArgumentContainer& x,
+                      RealType lr = RealType{ 0.01 })
+{
+  return gradient_descent<ArgumentContainer,
+                          RealType,
+                          std::decay_t<Objective>,
+                          tape_initializer_rvar<RealType>,
+                          reverse_mode_function_eval_policy<RealType>,
+                          reverse_mode_gradient_evaluation_policy<RealType>>(
+    std::forward<Objective>(obj),
+    x,
+    tape_initializer_rvar<RealType>{},
+    reverse_mode_function_eval_policy<RealType>{},
+    reverse_mode_gradient_evaluation_policy<RealType>{},
+    gradient_descent_update_policy<RealType>(lr));
+}
+
+template<class Objective,
+         typename ArgumentContainer,
+         typename RealType,
+         class InitializationPolicy>
+auto
+make_gradient_descent(Objective&& obj,
+                      ArgumentContainer& x,
+                      RealType lr,
+                      InitializationPolicy&& ip)
+{
+  return gradient_descent<ArgumentContainer,
+                          RealType,
+                          std::decay_t<Objective>,
+                          InitializationPolicy,
+                          reverse_mode_function_eval_policy<RealType>,
+                          reverse_mode_gradient_evaluation_policy<RealType>>(
+    std::forward<Objective>(obj),
+    x,
+    std::forward<InitializationPolicy>(ip),
+    reverse_mode_function_eval_policy<RealType>{},
+    reverse_mode_gradient_evaluation_policy<RealType>{},
+    gradient_descent_update_policy<RealType>(lr));
+}
+
+template<typename ArgumentContainer,
+         typename RealType,
+         class Objective,
+         class InitializationPolicy,
+         class ObjectiveEvalPolicy,
+         class GradEvalPolicy>
+auto
+make_gradient_descent(Objective&& obj,
+                      ArgumentContainer& x,
+                      RealType& lr,
+                      InitializationPolicy&& ip,
+                      ObjectiveEvalPolicy&& oep,
+                      GradEvalPolicy&& gep)
+{
+  return gradient_descent<ArgumentContainer,
+                          RealType,
+                          std::decay_t<Objective>,
+                          InitializationPolicy,
+                          ObjectiveEvalPolicy,
+                          GradEvalPolicy>(
+    std::forward<Objective>(obj),
+    x,
+    std::forward<InitializationPolicy>(ip),
+    std::forward<ObjectiveEvalPolicy>(oep),
+    std::forward<GradEvalPolicy>(gep),
+    gradient_descent_update_policy<RealType>{ lr });
+}
+
+} // namespace optimization
+} // namespace math
+} // namespace boost
+#endif
diff --git a/include/boost/math/optimization/gradient_optimizers.hpp b/include/boost/math/optimization/gradient_optimizers.hpp
new file mode 100644
index 0000000000..53fdbd6670
--- /dev/null
+++ b/include/boost/math/optimization/gradient_optimizers.hpp
@@ -0,0 +1,18 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+#ifndef GRADIENT_OPTIMIZERS_HPP
+#define GRADIENT_OPTIMIZERS_HPP
+#include <boost/math/differentiation/autodiff_reverse.hpp>
+#include <boost/math/optimization/gradient_descent.hpp>
+#include <boost/math/optimization/lbfgs.hpp>
+#include <boost/math/optimization/nesterov.hpp>
+
+namespace boost {
+namespace math {
+namespace optimization {
+} // namespace optimization
+} // namespace math
+} // namespace boost
+#endif
diff --git a/include/boost/math/optimization/lbfgs.hpp b/include/boost/math/optimization/lbfgs.hpp
new file mode 100644
index 0000000000..57b70d21a2
--- /dev/null
+++ b/include/boost/math/optimization/lbfgs.hpp
@@ -0,0 +1,384 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+#ifndef LBFGS_HPP
+#define LBFGS_HPP
+#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
+#include <boost/math/optimization/detail/gradient_opt_base.hpp>
+#include <boost/math/optimization/detail/rdiff_optimization_policies.hpp>
+#include <vector>
+
+#include "boost/math/optimization/detail/line_search_policies.hpp"
+#include <deque>
+
+namespace boost {
+namespace math {
+namespace optimization {
+
+namespace rdiff = boost::math::differentiation::reverse_mode;
+
+/** @brief> Helper struct for L-BFGS
+ *
+ * stores state of L-BFGS optimizer
+ *  @param> m = 10 -> history (how far back to look)
+ *  @param> S - > x_k - x_{k-1} ( m states )
+ *  @param> Y - > g_k - g_{k-1} (m states)
+ *  @param> rho - > 1/(y^T y)
+ *  @param> g_prev, x_prev - > previous state of argument and gradient
+ *  @param -> f_prev - > previous function value
+ *
+ *  https://en.wikipedia.org/wiki/Limited-memory_BFGS
+ *
+ *  Jorge Nocedal and Stephen J. Wright,
+ *  Numerical Optimization, 2nd Edition,
+ *  Springer, 2006.
+ *
+ *  pages 176-180
+ *  algorithms 7.4/7.5
+ *  */
+template<typename RealType>
+struct lbfgs_optimizer_state
+{
+  size_t m = 10; // default history length
+  std::deque<std::vector<RealType>> S, Y;
+  std::deque<RealType> rho;
+  std::vector<RealType> g_prev, x_prev;
+  RealType f_prev = std::numeric_limits<RealType>::quiet_NaN();
+  const RealType EPS = std::numeric_limits<RealType>::epsilon();
+
+  template<typename ArgumentContainer>
+  void update_state(ArgumentContainer& x,
+                    std::vector<RealType>& g_k,
+                    RealType fk)
+  {
+
+    // iteration 0
+    if (g_prev.empty()) {
+      g_prev.assign(g_k.begin(), g_k.end());
+      x_prev.resize(x.size());
+      std::transform(x.begin(), x.end(), x_prev.begin(), [](const auto& xi) {
+        return static_cast<RealType>(xi);
+      });
+      f_prev = fk;
+      return;
+    }
+
+    std::vector<RealType> s_k(x.size()), y_k(g_k.size());
+    for (size_t i = 0; i < x.size(); ++i) {
+      s_k[i] = static_cast<RealType>(x[i]) - x_prev[i];
+      y_k[i] = g_k[i] - g_prev[i];
+    }
+
+    RealType ys = dot(y_k, s_k);
+    RealType sn = sqrt(dot(s_k, s_k));
+    RealType yn = sqrt(dot(y_k, y_k));
+
+    const RealType threshold = EPS * sn * yn;
+    if (ys > threshold && ys > RealType(0)) { // check if curvature if non-zero
+      if (S.size() == m) {                    // iteration > m
+        S.pop_front();
+        Y.pop_front();
+        rho.pop_front();
+      }
+      S.push_back(std::move(s_k));
+      Y.push_back(std::move(y_k));
+      rho.push_back(RealType(1) / ys);
+    }
+
+    g_prev.assign(g_k.begin(), g_k.end());
+    // safely cast to realtype
+    std::transform(x.begin(), x.end(), x_prev.begin(), [](const auto& xi) {
+      return static_cast<RealType>(xi);
+    });
+    f_prev = fk;
+  }
+};
+
+/** @brief> helper update for l-bfgs
+ * x +=  alpha  * search direction
+ * */
+template<typename RealType>
+struct lbfgs_update_policy
+{
+  template<typename ArgumentType,
+           typename = typename std::enable_if<
+             boost::math::differentiation::reverse_mode::detail::is_expression<
+               ArgumentType>::value>::type>
+  void operator()(ArgumentType& x, RealType pk, RealType alpha)
+  {
+      x.get_value() += alpha * pk;
+  }
+  template<typename ArgumentType,
+           typename std::enable_if<!boost::math::differentiation::reverse_mode::
+                                     detail::is_expression<ArgumentType>::value,
+                                   int>::type = 0>
+  void operator()(ArgumentType& x, RealType pk, RealType alpha)
+  {
+    x += alpha * pk;
+  }
+};
+
+/**
+ *
+ * @brief Limited-memory BFGS (L-BFGS) optimizer
+ *
+ * The `lbfgs` class implements the Limited-memory BFGS optimization algorithm,
+ * a quasi-Newton method that approximates the inverse Hessian using a rolling
+ * window of the last `m` updates. It is suitable for medium- to large-scale
+ * optimization problems where full Hessian storage is infeasible.
+ *
+ * @tparam> ArgumentContainer: container type for parameters, e.g.
+ * std::vector<RealType>
+ * @tparam> RealType scalar floating type (e.g. double, float)
+ * @tparam> Objective: objective function. must support "f(x)" evaluation
+ * @tparam> InitializationPolicy: policy for initializing x
+ * @tparam> ObjectiveEvalPolicy: policy for computing the objective value
+ * @tparam> GradEvalPolicy: policy for computing gradients
+ * @tparam> LineaSearchPolicy: e.g. Armijo, StrongWolfe
+ *
+ * https://en.wikipedia.org/wiki/Limited-memory_BFGS
+ */
+
+template<typename ArgumentContainer,
+         typename RealType,
+         class Objective,
+         class InitializationPolicy,
+         class ObjectiveEvalPolicy,
+         class GradEvalPolicy,
+         class LineSearchPolicy>
+class lbfgs
+  : public abstract_optimizer<ArgumentContainer,
+                              RealType,
+                              Objective,
+                              InitializationPolicy,
+                              ObjectiveEvalPolicy,
+                              GradEvalPolicy,
+                              lbfgs_update_policy<RealType>,
+                              lbfgs<ArgumentContainer,
+                                    RealType,
+                                    Objective,
+                                    InitializationPolicy,
+                                    ObjectiveEvalPolicy,
+                                    GradEvalPolicy,
+                                    LineSearchPolicy>>
+{
+  using base_opt = abstract_optimizer<ArgumentContainer,
+                                      RealType,
+                                      Objective,
+                                      InitializationPolicy,
+                                      ObjectiveEvalPolicy,
+                                      GradEvalPolicy,
+                                      lbfgs_update_policy<RealType>,
+                                      lbfgs<ArgumentContainer,
+                                            RealType,
+                                            Objective,
+                                            InitializationPolicy,
+                                            ObjectiveEvalPolicy,
+                                            GradEvalPolicy,
+                                            LineSearchPolicy>>;
+
+  const RealType EPS = std::numeric_limits<RealType>::epsilon();
+  lbfgs_optimizer_state<RealType> state_;
+  LineSearchPolicy line_search_;
+
+  std::vector<RealType> compute_direction(const std::vector<RealType>& gk)
+  {
+    const size_t n = gk.size();
+    const size_t L = state_.S.size(); // since S changes when iter < m
+
+    if (L == 0) {
+      std::vector<RealType> p(n);
+      std::transform(
+        gk.begin(), gk.end(), p.begin(), [](RealType gi) { return -gi; });
+      return p;
+    }
+
+    std::vector<RealType> q = gk;
+    std::vector<RealType> alpha(L, RealType(0));
+    for (size_t t = 0; t < L; ++t) {
+      const std::size_t i = L - 1 - t; // newest first
+      const RealType sTq = dot(state_.S[i], q);
+      alpha[i] = state_.rho[i] * sTq;
+      axpy(-alpha[i], state_.Y[i], q);
+    }
+
+    const RealType sTy = dot(state_.S.back(), state_.Y.back());
+    const RealType yTy = dot(state_.Y.back(), state_.Y.back());
+    const RealType gamma = (yTy > RealType(0)) ? (sTy / yTy) : RealType(1);
+
+    std::vector<RealType> r = q;
+    scale(r, gamma);
+
+    for (std::size_t i = 0; i < L; ++i) {
+      const RealType yTr = dot(state_.Y[i], r);
+      const RealType beta = state_.rho[i] * yTr;
+      axpy(alpha[i] - beta, state_.S[i], r);
+    }
+    scale(r, RealType{ -1 });
+    return r;
+  }
+
+public:
+  using base_opt::base_opt;
+  lbfgs(Objective&& objective,
+        ArgumentContainer& x,
+        size_t m,
+        InitializationPolicy&& ip,
+        ObjectiveEvalPolicy&& oep,
+        GradEvalPolicy&& gep,
+        lbfgs_update_policy<RealType>&& up,
+        LineSearchPolicy&& lsp)
+    : base_opt(std::forward<Objective>(objective),
+               x,
+               std::forward<InitializationPolicy>(ip),
+               std::forward<ObjectiveEvalPolicy>(oep),
+               std::forward<GradEvalPolicy>(gep),
+               std::forward<lbfgs_update_policy<RealType>>(up))
+    , line_search_(lsp)
+  {
+    state_.m = m;
+
+    state_.S.clear();
+    state_.Y.clear();
+    state_.rho.clear();
+    state_.g_prev.clear();
+    state_.f_prev = std::numeric_limits<RealType>::quiet_NaN();
+  }
+
+  void step()
+  {
+    auto& x = this->arguments();
+    auto& g = this->gradients();
+    auto& obj = this->objective_value();
+    auto& obj_eval = this->obj_eval_;
+    auto& grad_eval = this->grad_eval_;
+    auto& objective = this->objective_;
+    auto& update = this->update_;
+
+    grad_eval(objective, x, obj_eval, obj, g);
+    state_.update_state(x, g, obj);
+    std::vector<RealType> p = compute_direction(g);
+    RealType alpha = line_search_(objective, obj_eval, grad_eval, x, g, p, obj);
+    for (size_t i = 0; i < x.size(); ++i) {
+      update(x[i], p[i], alpha);
+    }
+  }
+};
+
+template<class Objective, typename ArgumentContainer>
+auto
+make_lbfgs(Objective&& obj, ArgumentContainer& x, std::size_t m = 10)
+{
+    using RealType = typename argument_container_t<ArgumentContainer>::type;
+    return lbfgs<ArgumentContainer,
+                 RealType,
+                 std::decay_t<Objective>,
+                 tape_initializer_rvar<RealType>,
+                 reverse_mode_function_eval_policy<RealType>,
+                 reverse_mode_gradient_evaluation_policy<RealType>,
+                 strong_wolfe_line_search_policy<RealType>>(
+        std::forward<Objective>(obj),
+        x,
+        m,
+        tape_initializer_rvar<RealType>{},
+        reverse_mode_function_eval_policy<RealType>{},
+        reverse_mode_gradient_evaluation_policy<RealType>{},
+        lbfgs_update_policy<RealType>{},
+        strong_wolfe_line_search_policy<RealType>{});
+}
+
+template<class Objective,
+         typename ArgumentContainer,
+         class InitializationPolicy>
+auto
+make_lbfgs(Objective&& obj,
+           ArgumentContainer& x,
+           std::size_t m,
+           InitializationPolicy&& ip)
+{
+  using RealType = typename argument_container_t<ArgumentContainer>::type;
+
+  return lbfgs<ArgumentContainer,
+               RealType,
+               std::decay_t<Objective>,
+               InitializationPolicy,
+               reverse_mode_function_eval_policy<RealType>,
+               reverse_mode_gradient_evaluation_policy<RealType>,
+               strong_wolfe_line_search_policy<RealType>>(
+    std::forward<Objective>(obj),
+    x,
+    m,
+    std::forward<InitializationPolicy>(ip),
+    reverse_mode_function_eval_policy<RealType>{},
+    reverse_mode_gradient_evaluation_policy<RealType>{},
+    lbfgs_update_policy<RealType>{},
+    strong_wolfe_line_search_policy<RealType>{});
+}
+
+template<class Objective,
+         typename ArgumentContainer,
+         class InitializationPolicy,
+         class LineSearchPolicy>
+auto
+make_lbfgs(Objective&& obj,
+           ArgumentContainer& x,
+           std::size_t m,
+           InitializationPolicy&& ip,
+           LineSearchPolicy&& lsp)
+{
+  using RealType = typename argument_container_t<ArgumentContainer>::type;
+
+  return lbfgs<ArgumentContainer,
+               RealType,
+               std::decay_t<Objective>,
+               InitializationPolicy,
+               reverse_mode_function_eval_policy<RealType>,
+               reverse_mode_gradient_evaluation_policy<RealType>,
+               LineSearchPolicy>(
+    std::forward<Objective>(obj),
+    x,
+    m,
+    std::forward<InitializationPolicy>(ip),
+    reverse_mode_function_eval_policy<RealType>{},
+    reverse_mode_gradient_evaluation_policy<RealType>{},
+    lbfgs_update_policy<RealType>{},
+    std::forward<LineSearchPolicy>(lsp));
+}
+
+template<class Objective,
+         typename ArgumentContainer,
+         class InitializationPolicy,
+         class FunctionEvalPolicy,
+         class GradientEvalPolicy,
+         class LineSearchPolicy>
+auto
+make_lbfgs(Objective&& obj,
+           ArgumentContainer& x,
+           std::size_t m,
+           InitializationPolicy&& ip,
+           FunctionEvalPolicy&& fep,
+           GradientEvalPolicy&& gep,
+           LineSearchPolicy&& lsp)
+{
+  using RealType = typename argument_container_t<ArgumentContainer>::type;
+  return lbfgs<ArgumentContainer,
+               RealType,
+               std::decay_t<Objective>,
+               InitializationPolicy,
+               FunctionEvalPolicy,
+               GradientEvalPolicy,
+               LineSearchPolicy>(std::forward<Objective>(obj),
+                                 x,
+                                 m,
+                                 std::forward<InitializationPolicy>(ip),
+                                 std::forward<FunctionEvalPolicy>(fep),
+                                 std::forward<GradientEvalPolicy>(gep),
+                                 lbfgs_update_policy<RealType>{},
+                                 std::forward<LineSearchPolicy>(lsp));
+}
+
+} // namespace optimization
+} // namespace math
+} // namespace boost
+#endif
diff --git a/include/boost/math/optimization/minimizer.hpp b/include/boost/math/optimization/minimizer.hpp
new file mode 100644
index 0000000000..1b3e5b9aeb
--- /dev/null
+++ b/include/boost/math/optimization/minimizer.hpp
@@ -0,0 +1,335 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+#ifndef MINIMIZER_HPP
+#define MINIMIZER_HPP
+#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
+#include <boost/math/optimization/gradient_optimizers.hpp>
+#include <vector>
+namespace boost {
+namespace math {
+namespace optimization {
+template<typename RealType>
+struct optimization_result
+{
+  size_t num_iter = 0;
+  RealType objective_value;
+  std::vector<RealType> objective_history;
+  bool converged;
+};
+
+template<typename RealType>
+std::ostream&
+operator<<(std::ostream& os, const optimization_result<RealType>& r)
+{
+  os << "optimization_result {\n"
+     << "  num_iter        = " << r.num_iter << "\n"
+     << "  objective_value = " << r.objective_value << "\n"
+     << "  converged       = " << std::boolalpha << r.converged << "\n"
+     << "  objective_history = [";
+
+  for (std::size_t i = 0; i < r.objective_history.size(); ++i) {
+    os << r.objective_history[i];
+    if (i + 1 < r.objective_history.size()) {
+      os << ", ";
+    }
+  }
+  os << "]\n}\n";
+  return os;
+}
+/*****************************************************************************************/
+template<typename RealType>
+struct gradient_norm_convergence_policy
+{
+  RealType tol_;
+  explicit gradient_norm_convergence_policy(RealType tol)
+    : tol_(tol)
+  {
+  }
+
+  template<class GradientContainer>
+  bool operator()(const GradientContainer& g, RealType /*objective_v*/) const
+  {
+    return norm_2(g) < tol_;
+  }
+};
+
+template<typename RealType>
+struct objective_tol_convergence_policy
+{
+  RealType tol_;
+  mutable RealType last_value_;
+  mutable bool first_call_;
+
+  explicit objective_tol_convergence_policy(RealType tol)
+    : tol_(tol)
+    , last_value_(0)
+    , first_call_(true)
+  {
+  }
+
+  template<class GradientContainer>
+  bool operator()(const GradientContainer&, RealType objective_v) const
+  {
+    if (first_call_) {
+      last_value_ = objective_v;
+      first_call_ = false;
+      return false;
+    }
+    RealType diff = abs(objective_v - last_value_);
+    last_value_ = objective_v;
+    return diff < tol_;
+  }
+};
+
+template<typename RealType>
+struct relative_objective_tol_policy
+{
+  RealType rel_tol_;
+  mutable RealType last_value_;
+  mutable bool first_call_;
+
+  explicit relative_objective_tol_policy(RealType rel_tol)
+    : rel_tol_(rel_tol)
+    , last_value_(0)
+    , first_call_(true)
+  {
+  }
+
+  template<class GradientContainer>
+  bool operator()(const GradientContainer&, RealType objective_v) const
+  {
+    if (first_call_) {
+      last_value_ = objective_v;
+      first_call_ = false;
+      return false;
+    }
+    RealType denom = max<RealType>(1, abs(last_value_));
+    RealType rel_diff = abs(objective_v - last_value_) / denom;
+    last_value_ = objective_v;
+    return rel_diff < rel_tol_;
+  }
+};
+
+template<class Policy1, class Policy2>
+struct combined_convergence_policy
+{
+  Policy1 p1_;
+  Policy2 p2_;
+
+  combined_convergence_policy(Policy1 p1, Policy2 p2)
+    : p1_(p1)
+    , p2_(p2)
+  {
+  }
+
+  template<class GradientContainer, class RealType>
+  bool operator()(const GradientContainer& g, RealType obj) const
+  {
+    return p1_(g, obj) || p2_(g, obj);
+  }
+};
+
+/*****************************************************************************************/
+struct max_iter_termination_policy
+{
+  size_t max_iter_;
+  max_iter_termination_policy(size_t max_iter)
+    : max_iter_(max_iter) {};
+  bool operator()(size_t iter)
+  {
+    if (iter < max_iter_) {
+      return false;
+    }
+    return true;
+  }
+};
+
+struct wallclock_termination_policy
+{
+  std::chrono::steady_clock::time_point start_;
+  std::chrono::milliseconds max_time_;
+
+  explicit wallclock_termination_policy(std::chrono::milliseconds max_time)
+    : start_(std::chrono::steady_clock::now())
+    , max_time_(max_time)
+  {
+  }
+
+  bool operator()(size_t /*iter*/) const
+  {
+    return std::chrono::steady_clock::now() - start_ > max_time_;
+  }
+};
+
+/*****************************************************************************************/
+template<typename ArgumentContainer>
+struct unconstrained_policy
+{
+  void operator()(ArgumentContainer&) {}
+};
+
+template<typename ArgumentContainer, typename RealType>
+struct box_constraints
+{
+  RealType min_, max_;
+  box_constraints(RealType min, RealType max)
+    : min_(min)
+    , max_(max) {};
+  void operator()(ArgumentContainer& x)
+  {
+    for (auto& xi : x) {
+      xi = (xi < min_) ? min_ : (max_ < xi) ? max_ : xi;
+    }
+  }
+};
+
+template<typename ArgumentContainer, typename RealType>
+struct nonnegativity_constraint
+{
+  void operator()(ArgumentContainer& x) const
+  {
+    for (auto& xi : x) {
+      if (xi < RealType{ 0 })
+        xi = RealType{ 0 };
+    }
+  }
+};
+template<typename ArgumentContainer, typename RealType>
+struct l2_ball_constraint
+{
+  RealType radius_;
+
+  explicit l2_ball_constraint(RealType radius)
+    : radius_(radius)
+  {
+  }
+
+  void operator()(ArgumentContainer& x) const
+  {
+    RealType norm2v = norm_2(x);
+    if (norm2v > radius_) {
+      RealType scale = radius_ / norm2v;
+      for (auto& xi : x)
+        xi *= scale;
+    }
+  }
+};
+
+template<typename ArgumentContainer, typename RealType>
+struct l1_ball_constraint
+{
+  RealType radius_;
+
+  explicit l1_ball_constraint(RealType radius)
+    : radius_(radius)
+  {
+  }
+
+  void operator()(ArgumentContainer& x) const
+  {
+    RealType norm1v = norm_1(x);
+
+    if (norm1v > radius_) {
+      RealType scale = radius_ / norm1v;
+      for (auto& xi : x)
+        xi *= scale;
+    }
+  }
+};
+template<typename ArgumentContainer, typename RealType>
+struct simplex_constraint
+{
+  void operator()(ArgumentContainer& x) const
+  {
+    RealType sum = RealType{ 0 };
+    for (auto& xi : x) {
+      if (xi < RealType{ 0 })
+        xi = RealType{ 0 }; // clip negatives
+      sum += xi;
+    }
+    if (sum > RealType{ 0 }) {
+      for (auto& xi : x)
+        xi /= sum;
+    }
+  }
+};
+
+template<typename ArgumentContainer>
+struct function_constraint
+{
+  using func_t = void (*)(ArgumentContainer&);
+
+  func_t f_;
+
+  explicit function_constraint(func_t f)
+    : f_(f)
+  {
+  }
+
+  void operator()(ArgumentContainer& x) const { f_(x); }
+};
+template<typename ArgumentContainer, typename RealType>
+struct unit_sphere_constraint
+{
+  void operator()(ArgumentContainer& x) const
+  {
+    RealType norm = norm_2(x);
+    if (norm > RealType{ 0 }) {
+      for (auto& xi : x)
+        xi /= norm;
+    }
+  }
+};
+/*****************************************************************************************/
+
+template<class Optimizer,
+         class ConstraintPolicy,
+         class ConvergencePolicy,
+         class TerminationPolicy>
+auto
+minimize_impl(Optimizer& opt,
+              ConstraintPolicy project,
+              ConvergencePolicy converged,
+              TerminationPolicy terminate,
+              bool history)
+{
+  optimization_result<typename Optimizer::real_type_t> result;
+  size_t iter = 0;
+  do {
+    opt.step();
+    project(opt.arguments());
+    ++iter;
+    if (history) {
+      result.objective_history.push_back(opt.objective_value());
+    }
+
+  } while (!converged(opt.gradients(), opt.objective_value()) &&
+           !terminate(iter));
+  result.num_iter = iter;
+  result.objective_value = opt.objective_value();
+  result.converged = converged(opt.gradients(), opt.objective_value());
+  return result;
+}
+template<class Optimizer,
+         class ConstraintPolicy =
+           unconstrained_policy<typename Optimizer::argument_container_t>,
+         class ConvergencePolicy =
+           gradient_norm_convergence_policy<typename Optimizer::real_type_t>,
+         class TerminationPolicy = max_iter_termination_policy>
+auto
+minimize(Optimizer& opt,
+         ConstraintPolicy project = ConstraintPolicy{},
+         ConvergencePolicy converged =
+           ConvergencePolicy{
+             static_cast<typename Optimizer::real_type_t>(1e-8) },
+         TerminationPolicy terminate = TerminationPolicy(10000),
+         bool history = true)
+{
+  return minimize_impl(opt, project, converged, terminate, history);
+}
+} // namespace optimization
+} // namespace math
+} // namespace boost
+#endif
diff --git a/include/boost/math/optimization/nesterov.hpp b/include/boost/math/optimization/nesterov.hpp
new file mode 100644
index 0000000000..efd13a5029
--- /dev/null
+++ b/include/boost/math/optimization/nesterov.hpp
@@ -0,0 +1,208 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+#ifndef NESTEROV_HPP
+#define NESTEROV_HPP
+#include <boost/math/optimization/detail/differentiable_opt_utilties.hpp>
+#include <boost/math/optimization/detail/gradient_opt_base.hpp>
+#include <boost/math/optimization/detail/rdiff_optimization_policies.hpp>
+#include <vector>
+
+namespace boost {
+namespace math {
+namespace optimization {
+
+namespace rdiff = boost::math::differentiation::reverse_mode;
+
+/**
+ * @brief The nesterov_update_policy class
+ */
+template<typename RealType>
+struct nesterov_update_policy
+{
+  RealType lr_, mu_;
+  nesterov_update_policy(RealType lr, RealType mu)
+    : lr_(lr)
+    , mu_(mu) {};
+
+  template<typename ArgumentType,
+           typename = typename std::enable_if<
+             boost::math::differentiation::reverse_mode::detail::is_expression<
+               ArgumentType>::value>::type>
+  void operator()(ArgumentType& x, RealType& g, RealType& v)
+  {
+    RealType v_prev = v;
+    v = mu_ * v - lr_ * g;
+    x.get_value() += -mu_ * v_prev + (static_cast<RealType>(1) + mu_) * v;
+  }
+  template<typename ArgumentType,
+           typename std::enable_if<!boost::math::differentiation::reverse_mode::
+                                     detail::is_expression<ArgumentType>::value,
+                                   int>::type = 0>
+  void operator()(ArgumentType& x, RealType& g, RealType& v) const
+  {
+    const RealType v_prev = v;
+    v = mu_ * v - lr_ * g;
+    x += -mu_ * v_prev + (static_cast<RealType>(1) + mu_) * v;
+  }
+  RealType lr() const noexcept { return lr_; }
+  RealType mu() const noexcept { return mu_; }
+};
+
+/**
+ * @brief The nesterov_accelerated_gradient class
+ *
+ * https://jlmelville.github.io/mize/nesterov.html
+ */
+template<typename ArgumentContainer,
+         typename RealType,
+         class Objective,
+         class InitializationPolicy,
+         class ObjectiveEvalPolicy,
+         class GradEvalPolicy>
+class nesterov_accelerated_gradient
+  : public abstract_optimizer<
+      ArgumentContainer,
+      RealType,
+      Objective,
+      InitializationPolicy,
+      ObjectiveEvalPolicy,
+      GradEvalPolicy,
+      nesterov_update_policy<RealType>,
+      nesterov_accelerated_gradient<ArgumentContainer,
+                                    RealType,
+                                    Objective,
+                                    InitializationPolicy,
+                                    ObjectiveEvalPolicy,
+                                    GradEvalPolicy>>
+{
+  using base_opt =
+    abstract_optimizer<ArgumentContainer,
+                       RealType,
+                       Objective,
+                       InitializationPolicy,
+                       ObjectiveEvalPolicy,
+                       GradEvalPolicy,
+                       nesterov_update_policy<RealType>,
+                       nesterov_accelerated_gradient<ArgumentContainer,
+                                                     RealType,
+                                                     Objective,
+                                                     InitializationPolicy,
+                                                     ObjectiveEvalPolicy,
+                                                     GradEvalPolicy>>;
+  std::vector<RealType> v_;
+
+public:
+  using base_opt::base_opt;
+  nesterov_accelerated_gradient(Objective&& objective,
+                                ArgumentContainer& x,
+                                InitializationPolicy&& ip,
+                                ObjectiveEvalPolicy&& oep,
+                                GradEvalPolicy&& gep,
+                                nesterov_update_policy<RealType>&& up)
+    : base_opt(std::forward<Objective>(objective),
+               x,
+               std::forward<InitializationPolicy>(ip),
+               std::forward<ObjectiveEvalPolicy>(oep),
+               std::forward<GradEvalPolicy>(gep),
+               std::forward<nesterov_update_policy<RealType>>(up))
+    , v_(x.size(), RealType(0))
+  {
+  }
+
+  void step()
+  {
+    auto& x = this->arguments();
+    auto& g = this->gradients();
+    auto& obj = this->objective_value();
+    auto& obj_eval = this->obj_eval_;
+    auto& grad_eval = this->grad_eval_;
+    auto& objective = this->objective_;
+    auto& update = this->update_;
+
+    grad_eval(objective, x, obj_eval, obj, g);
+
+    for (size_t i = 0; i < x.size(); ++i) {
+      update(x[i], g[i], v_[i]);
+    }
+  }
+};
+template<class Objective, typename ArgumentContainer, typename RealType>
+auto
+make_nag(Objective&& obj,
+         ArgumentContainer& x,
+         RealType lr = RealType{ 0.01 },
+         RealType mu = RealType{ 0.95 })
+{
+  return nesterov_accelerated_gradient<
+    ArgumentContainer,
+    RealType,
+    std::decay_t<Objective>,
+    tape_initializer_rvar<RealType>,
+    reverse_mode_function_eval_policy<RealType>,
+    reverse_mode_gradient_evaluation_policy<RealType>>(
+    std::forward<Objective>(obj),
+    x,
+    tape_initializer_rvar<RealType>{},
+    reverse_mode_function_eval_policy<RealType>{},
+    reverse_mode_gradient_evaluation_policy<RealType>{},
+    nesterov_update_policy<RealType>(lr, mu));
+}
+template<class Objective,
+         typename ArgumentContainer,
+         typename RealType,
+         class InitializationPolicy>
+auto
+make_nag(Objective&& obj,
+         ArgumentContainer& x,
+         RealType lr,
+         RealType mu,
+         InitializationPolicy&& ip)
+{
+  return nesterov_accelerated_gradient<
+    ArgumentContainer,
+    RealType,
+    std::decay_t<Objective>,
+    InitializationPolicy,
+    reverse_mode_function_eval_policy<RealType>,
+    reverse_mode_gradient_evaluation_policy<RealType>>(
+    std::forward<Objective>(obj),
+    x,
+    std::forward<InitializationPolicy>(ip),
+    reverse_mode_function_eval_policy<RealType>{},
+    reverse_mode_gradient_evaluation_policy<RealType>{},
+    nesterov_update_policy<RealType>(lr, mu));
+}
+template<typename ArgumentContainer,
+         typename RealType,
+         class Objective,
+         class InitializationPolicy,
+         class ObjectiveEvalPolicy,
+         class GradEvalPolicy>
+auto
+make_nag(Objective&& obj,
+         ArgumentContainer& x,
+         RealType lr,
+         RealType mu,
+         InitializationPolicy&& ip,
+         ObjectiveEvalPolicy&& oep,
+         GradEvalPolicy&& gep)
+{
+  return nesterov_accelerated_gradient<ArgumentContainer,
+                                       RealType,
+                                       std::decay_t<Objective>,
+                                       InitializationPolicy,
+                                       ObjectiveEvalPolicy,
+                                       GradEvalPolicy>(
+    std::forward<Objective>(obj),
+    x,
+    std::forward<InitializationPolicy>(ip),
+    std::forward<ObjectiveEvalPolicy>(oep),
+    std::forward<GradEvalPolicy>(gep),
+    nesterov_update_policy<RealType>{ lr, mu });
+}
+} // namespace optimization
+} // namespace math
+} // namespace boost
+#endif
diff --git a/test/Jamfile.v2 b/test/Jamfile.v2
index 279d547599..c5bde01635 100644
--- a/test/Jamfile.v2
+++ b/test/Jamfile.v2
@@ -1368,6 +1368,13 @@ test-suite test_reverse_mode_autodiff
         [ run test_reverse_mode_autodiff_basic_math_ops.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release ]
         [ run test_reverse_mode_autodiff_error_functions.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release ]
     ;
+
+test-suite gradient_based_optimizers
+    :
+        [ run test_gradient_descent_optimizer.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release ] 
+        [ run test_nesterov_optimizer.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release ] 
+        [ run test_lbfgs.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release ] 
+    ;
 # BEGIN AUTODIFF LONG RUNNING TESTS
 test-suite autodiff-long-running-tests
     :
@@ -2355,6 +2362,6 @@ explicit no_eh_tests ;
 # Some aliases which group blocks of tests for CI testing:
 
 alias github_ci_block_1 : special_fun float128_tests distribution_tests mp misc concepts ;
-alias github_ci_block_2 : quadrature interpolators autodiff test_reverse_mode_autodiff ../example//examples ../tools ;
+alias github_ci_block_2 : quadrature interpolators autodiff test_reverse_mode_autodiff gradient_based_optimizers ../example//examples ../tools ;
 explicit github_ci_block_1 ;
 explicit github_ci_block_2 ;
diff --git a/test/test_functions_for_optimization.hpp b/test/test_functions_for_optimization.hpp
index b091984f8a..da989fc09e 100644
--- a/test/test_functions_for_optimization.hpp
+++ b/test/test_functions_for_optimization.hpp
@@ -12,13 +12,35 @@
 #include <array>
 #include <vector>
 
+/* simple n-d quadratic function */
+template<typename RealType>
+RealType
+quadratic(std::vector<RealType>& x)
+{
+  RealType res{ 0.0 };
+  for (auto& item : x) {
+    res += item * item;
+  }
+  return res;
+}
+
+template<typename RealType>
+RealType
+quadratic_high_cond_2D(std::vector<RealType>& x)
+{
+  return 1000 * x[0] * x[0] + x[1] * x[1];
+}
+
 // Taken from: https://en.wikipedia.org/wiki/Test_functions_for_optimization
-template <typename Real> Real ackley(std::array<Real, 2> const &v) {
-  using std::sqrt;
+template<typename Real>
+Real
+ackley(std::array<Real, 2> const& v)
+{
+  using boost::math::constants::e;
+  using boost::math::constants::two_pi;
   using std::cos;
   using std::exp;
-  using boost::math::constants::two_pi;
-  using boost::math::constants::e;
+  using std::sqrt;
   Real x = v[0];
   Real y = v[1];
   Real arg1 = -sqrt((x * x + y * y) / 2) / 5;
@@ -26,16 +48,21 @@ template <typename Real> Real ackley(std::array<Real, 2> const &v) {
   return -20 * exp(arg1) - exp(arg2 / 2) + 20 + e<Real>();
 }
 
-template <typename Real> auto rosenbrock_saddle(std::array<Real, 2> const &v) -> Real {
-  Real x { v[0] };
-  Real y { v[1] };
+template<typename Real>
+auto
+rosenbrock_saddle(std::array<Real, 2> const& v) -> Real
+{
+  Real x{ v[0] };
+  Real y{ v[1] };
   return static_cast<Real>(100 * (x * x - y) * (x * x - y) + (1 - x) * (1 - x));
 }
 
-
-template <class Real> Real rastrigin(std::vector<Real> const &v) {
-  using std::cos;
+template<class Real>
+Real
+rastrigin(std::vector<Real> const& v)
+{
   using boost::math::constants::two_pi;
+  using std::cos;
   auto A = static_cast<Real>(10);
   auto y = static_cast<Real>(10 * v.size());
   for (auto x : v) {
@@ -46,7 +73,9 @@ template <class Real> Real rastrigin(std::vector<Real> const &v) {
 
 // Useful for testing return-type != scalar argument type,
 // and robustness to NaNs:
-double sphere(std::vector<float> const &v) {
+double
+sphere(std::vector<float> const& v)
+{
   double r = 0.0;
   for (auto x : v) {
     double x_ = static_cast<double>(x);
@@ -59,23 +88,27 @@ double sphere(std::vector<float> const &v) {
 }
 
 template<typename Real>
-Real three_hump_camel(std::array<Real, 2> const & v) {
+Real
+three_hump_camel(std::array<Real, 2> const& v)
+{
   Real x = v[0];
   Real y = v[1];
-  auto xsq = x*x;
-  return 2*xsq - (1 + Real(1)/Real(20))*xsq*xsq  + xsq*xsq*xsq/6 + x*y + y*y;
+  auto xsq = x * x;
+  return 2 * xsq - (1 + Real(1) / Real(20)) * xsq * xsq + xsq * xsq * xsq / 6 +
+         x * y + y * y;
 }
 
 // Minima occurs at (3, 1/2) with value 0:
 template<typename Real>
-Real beale(std::array<Real, 2> const & v) {
+Real
+beale(std::array<Real, 2> const& v)
+{
   Real x = v[0];
   Real y = v[1];
-  Real t1 = Real(3)/Real(2) -x + x*y;
-  Real t2 = Real(9)/Real(4) -x  + x*y*y;
-  Real t3 = Real(21)/Real(8) -x  + x*y*y*y;
-  return t1*t1 + t2*t2 + t3*t3;
+  Real t1 = Real(3) / Real(2) - x + x * y;
+  Real t2 = Real(9) / Real(4) - x + x * y * y;
+  Real t3 = Real(21) / Real(8) - x + x * y * y * y;
+  return t1 * t1 + t2 * t2 + t3 * t3;
 }
 
-
 #endif
diff --git a/test/test_gradient_descent_optimizer.cpp b/test/test_gradient_descent_optimizer.cpp
new file mode 100644
index 0000000000..bc9e029f0b
--- /dev/null
+++ b/test/test_gradient_descent_optimizer.cpp
@@ -0,0 +1,339 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+#include "test_autodiff_reverse.hpp" // reuse for some basic options
+#include "test_functions_for_optimization.hpp"
+#include <boost/math/differentiation/autodiff_reverse.hpp>
+#include <boost/math/optimization/gradient_descent.hpp>
+#include <boost/math/optimization/minimizer.hpp>
+namespace rdiff = boost::math::differentiation::reverse_mode;
+namespace bopt = boost::math::optimization;
+BOOST_AUTO_TEST_SUITE(basic_gradient_descent)
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(default_gd_test, T, all_float_types)
+{
+  size_t NITER = 2000;
+  size_t N = 15;
+  T lr = T{ 1e-2 };
+  RandomSample<T> rng{ T(-100), (100) };
+  std::vector<rdiff::rvar<T, 1>> x_ad;
+  T eps = T{ 1e-3 };
+  for (size_t i = 0; i < N; ++i) {
+    x_ad.push_back(rng.next());
+  }
+  auto gdopt =
+    bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x_ad, lr);
+  for (size_t i = 0; i < NITER; ++i) {
+    gdopt.step();
+  }
+  for (auto& x : x_ad) {
+    BOOST_REQUIRE_SMALL(x.item(), eps);
+  }
+}
+BOOST_AUTO_TEST_CASE_TEMPLATE(test_minimize, T, all_float_types)
+{
+  size_t NITER = 2000;
+  size_t N = 15;
+  T lr = T{ 1e-2 };
+  RandomSample<T> rng{ T(-100), (100) };
+  std::vector<rdiff::rvar<T, 1>> x_ad;
+  T eps = T{ 1e-3 };
+  for (size_t i = 0; i < N; ++i) {
+    x_ad.push_back(rng.next());
+  }
+  auto gdopt =
+    bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x_ad, lr);
+  auto z = minimize(gdopt);
+  for (auto& x : x_ad) {
+    BOOST_REQUIRE_SMALL(x.item(), eps);
+  }
+}
+BOOST_AUTO_TEST_CASE_TEMPLATE(random_initializer_test, T, all_float_types)
+{
+  size_t N = 10;
+  T lr = T{ 1e-2 };
+  std::vector<rdiff::rvar<T, 1>> x(N);
+
+  auto gdopt =
+    bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>,
+                                x,
+                                lr,
+                                bopt::random_uniform_initializer_rvar<T>(
+                                  -2.0, 2.0, 1234)); // all initialized to 5
+  for (auto& xi : x) {
+    T v = xi.item();
+    BOOST_TEST(v >= -2);
+    BOOST_TEST(v <= 2);
+  }
+  gdopt.step();
+}
+BOOST_AUTO_TEST_CASE_TEMPLATE(const_initializer_test, T, all_float_types)
+{
+  size_t N = 10;
+  T lr = T{ 1e-2 };
+  std::vector<rdiff::rvar<T, 1>> x(N);
+
+  auto gdopt = bopt::make_gradient_descent(
+    &quadratic<rdiff::rvar<T, 1>>,
+    x,
+    lr,
+    bopt::costant_initializer_rvar<T>(T{ 5.0 })); // all initialized to 5
+
+  for (auto& xi : x) {
+    T v = xi.item();
+    BOOST_REQUIRE_CLOSE(v, T{ 5.0 }, T{ 1e-3 });
+  }
+  gdopt.step();
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(box_constraint_test, T, all_float_types)
+{
+  size_t N = 5;
+  T lr = T{ 1e-2 };
+  std::vector<rdiff::rvar<T, 1>> x(N, T{ 10 });
+
+  auto gdopt =
+    bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
+
+  auto res = bopt::minimize(
+    gdopt, bopt::box_constraints<std::vector<rdiff::rvar<T, 1>>, T>(-1.0, 1.0));
+
+  for (auto& xi : x) {
+    BOOST_TEST(xi.item() >= -1.0);
+    BOOST_TEST(xi.item() <= 1.0);
+  }
+}
+BOOST_AUTO_TEST_CASE_TEMPLATE(max_iter_test, T, all_float_types)
+{
+  size_t N = 2;
+  T lr = T{ 1e-6 }; // very slow learning
+  std::vector<rdiff::rvar<T, 1>> x = { T{ 5 }, T{ 5 } };
+
+  auto gdopt =
+    bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
+
+  size_t max_iter = 50;
+  auto res =
+    bopt::minimize(gdopt,
+                   bopt::unconstrained_policy<std::vector<rdiff::rvar<T, 1>>>{},
+                   bopt::gradient_norm_convergence_policy<T>(T{ 1e-20 }),
+                   bopt::max_iter_termination_policy(max_iter));
+
+  BOOST_TEST(!res.converged); // should not converge with tiny lr
+  BOOST_REQUIRE_EQUAL(res.num_iter, max_iter);
+}
+BOOST_AUTO_TEST_CASE_TEMPLATE(history_tracking_test, T, all_float_types)
+{
+  size_t N = 3;
+  T lr = T{ 1e-2 };
+  std::vector<rdiff::rvar<T, 1>> x = { T{ 3 }, T{ -4 }, T{ 5 } };
+
+  auto gdopt =
+    bopt::make_gradient_descent(&quadratic<rdiff::rvar<T, 1>>, x, lr);
+
+  auto res =
+    bopt::minimize(gdopt,
+                   bopt::unconstrained_policy<std::vector<rdiff::rvar<T, 1>>>{},
+                   bopt::gradient_norm_convergence_policy<T>(T{ 1e-6 }),
+                   bopt::max_iter_termination_policy(1000),
+                   true); // enable history
+
+  BOOST_TEST(!res.objective_history.empty());
+  BOOST_TEST(res.objective_history.front() > res.objective_history.back());
+}
+BOOST_AUTO_TEST_CASE_TEMPLATE(rosenbrock_test, T, all_float_types)
+{
+  std::array<rdiff::rvar<T, 1>, 2> x = { T{ -1.2 }, T{ 1.0 } }; // bad start
+  T lr = T{ 1e-3 };
+
+  auto gdopt =
+    bopt::make_gradient_descent(&rosenbrock_saddle<rdiff::rvar<T, 1>>, x, lr);
+
+  auto res = bopt::minimize(
+    gdopt,
+    bopt::unconstrained_policy<std::array<rdiff::rvar<T, 1>, 2>>{},
+    bopt::gradient_norm_convergence_policy<T>(T{ 1e-4 }),
+    bopt::max_iter_termination_policy(50000));
+  BOOST_TEST(res.converged);
+  BOOST_REQUIRE_CLOSE(x[0].item(), T{ 1.0 }, T{ 1e-1 });
+  BOOST_REQUIRE_CLOSE(x[1].item(), T{ 1.0 }, T{ 1e-1 });
+}
+BOOST_AUTO_TEST_CASE_TEMPLATE(objective_tol_convergence_test,
+                              T,
+                              all_float_types)
+{
+  using policy_t = bopt::objective_tol_convergence_policy<T>;
+  policy_t pol(1e-3);
+  std::vector<T> dummy_grad;
+
+  BOOST_TEST(!pol(dummy_grad, T{100.0}));
+  BOOST_TEST(!pol(dummy_grad, T{99.0}));
+  BOOST_TEST(pol(dummy_grad, T{99.0005}));
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(relative_objective_tol_test, T, all_float_types)
+{
+  using policy_t = bopt::relative_objective_tol_policy<T>;
+  policy_t pol(1e-3);
+
+  std::vector<T> dummy_grad;
+  BOOST_TEST(!pol(dummy_grad, T{1000.0}));
+  BOOST_TEST(!pol(dummy_grad, T{1010.0}));
+  BOOST_TEST(pol(dummy_grad, T{1010.5}));
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(combined_policy_test, T, all_float_types)
+{
+  using pol_abs = bopt::objective_tol_convergence_policy<T>;
+  using pol_rel = bopt::relative_objective_tol_policy<T>;
+  using pol_comb = bopt::combined_convergence_policy<pol_abs, pol_rel>;
+
+  pol_abs abs_pol(1e-6);
+  pol_rel rel_pol(1e-3);
+  pol_comb comb(abs_pol, rel_pol);
+
+  std::vector<T> dummy_grad;
+
+  BOOST_TEST(!comb(dummy_grad, T{100.0}));
+  BOOST_TEST(!comb(dummy_grad, T{110.0}));
+  BOOST_TEST(comb(dummy_grad, T{110.1}));
+  BOOST_TEST(comb(dummy_grad, T{110.1000001}));
+}
+BOOST_AUTO_TEST_CASE_TEMPLATE(nonnegativity_constraint_test, T, all_float_types)
+{
+    std::vector<T> x = {T{1.0}, T{-2.0}, T{3.0}, T{-4.0}};
+
+    bopt::nonnegativity_constraint<std::vector<T>, T> proj;
+    proj(x);
+
+    for (auto& xi : x)
+        BOOST_TEST(xi >= 0.0);
+    BOOST_TEST(x == std::vector<T>({T{1.0}, T{0.0}, T{3.0}, T{0.0}}));
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(l2_ball_constraint_test, T, all_float_types)
+{
+    std::vector<T>                              x = {T{3.0}, T{4.0}}; // norm = 5
+    bopt::l2_ball_constraint<std::vector<T>, T> proj(1.0);
+    proj(x);
+
+    T norm = sqrt(x[0] * x[0] + x[1] * x[1]);
+    BOOST_TEST(abs(norm - T{1.0}) < T{1e-12}); // projected to unit circle
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(l1_ball_constraint_test, T, all_float_types)
+{
+    std::vector<T> x = {T{3.0}, T{4.0}}; // L1 norm = 7
+
+    bopt::l1_ball_constraint<std::vector<T>, T> proj(2.0);
+    proj(x);
+
+    T norm1 = abs(x[0]) + abs(x[1]);
+    BOOST_TEST(abs(norm1 - T{2.0}) < T{1e-12});
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(simplex_constraint_test, T, all_float_types)
+{
+    std::vector<T> x = {T{-1.0}, T{2.0}, T{3.0}}; // has negative and sum != 1
+
+    bopt::simplex_constraint<std::vector<T>, T> proj;
+    proj(x);
+
+    T sum = 0.0;
+    for (auto& xi : x) {
+        BOOST_TEST(xi >= 0.0); // all nonnegative
+        sum += xi;
+    }
+    BOOST_TEST(abs(sum - T{1.0}) < T{1e-12}); // normalized to sum=1
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(unit_sphere_constraint_test, T, all_float_types)
+{
+    std::vector<T>                                  x = {T{0.3}, T{0.4}}; // norm = 0.5
+    bopt::unit_sphere_constraint<std::vector<T>, T> proj;
+    proj(x);
+
+    T norm = sqrt(x[0] * x[0] + x[1] * x[1]);
+    BOOST_TEST(abs(norm - T{1.0}) < T{1e-12}); // always projected to sphere
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(function_constraint_test, T, all_float_types)
+{
+  auto clip_to_half = [](std::vector<T>& v) {
+    for (auto& xi : v)
+      if (xi > 0.5)
+        xi = 0.5;
+  };
+
+  bopt::function_constraint<std::vector<T>> proj(clip_to_half);
+  std::vector<T>                            x = {T{0.2}, T{0.7}, T{1.5}};
+  proj(x);
+
+  BOOST_TEST(x == std::vector<T>({T{0.2}, T{0.5}, T{0.5}}));
+}
+
+template<typename RealType>
+struct no_init_policy
+{
+  void operator()(std::vector<RealType>& x) const noexcept {}
+};
+
+template<typename RealType>
+struct analytic_objective_eval_pol
+{
+  template<typename Objective, typename ArgumentContainer>
+  RealType operator()(Objective&& objective, ArgumentContainer& x)
+  {
+    return objective(x);
+  }
+};
+
+template<typename RealType>
+struct analytic_gradient_eval_pol
+{
+  template<class Objective,
+           class ArgumentContainer,
+           class FunctionEvaluationPolicy>
+  void operator()(Objective&& obj_f,
+                  ArgumentContainer& x,
+                  FunctionEvaluationPolicy&& f_eval_pol,
+                  RealType& obj_v,
+                  std::vector<RealType>& grad_container)
+  {
+    RealType v = f_eval_pol(obj_f, x);
+    obj_v = v;
+    grad_container.resize(x.size());
+    for (size_t i = 0; i < x.size(); ++i) {
+      grad_container[i] = 2 * x[i];
+    }
+  }
+};
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(analytic_derivative_policies, T, all_float_types)
+{
+  std::vector<T> x;
+  size_t NITER = 2000;
+  size_t N = 15;
+  T lr = T{ 1e-2 };
+  RandomSample<T> rng{ T(-100), (100) };
+  T eps = T{ 1e-3 };
+  for (size_t i = 0; i < N; ++i) {
+    x.push_back(rng.next());
+  }
+
+  auto gdopt = bopt::make_gradient_descent(&quadratic<T>,
+                                           x,
+                                           lr,
+                                           no_init_policy<T>{},
+                                           analytic_objective_eval_pol<T>{},
+                                           analytic_gradient_eval_pol<T>{});
+
+  for (size_t i = 0; i < NITER; ++i) {
+    gdopt.step();
+  }
+  for (auto& xi : x) {
+    BOOST_REQUIRE_SMALL(xi, eps);
+  }
+}
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/test_lbfgs.cpp b/test/test_lbfgs.cpp
new file mode 100644
index 0000000000..c93dd266e0
--- /dev/null
+++ b/test/test_lbfgs.cpp
@@ -0,0 +1,119 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+#include "test_autodiff_reverse.hpp"
+#include "test_functions_for_optimization.hpp"
+#include <boost/math/differentiation/autodiff_reverse.hpp>
+#include <boost/math/optimization/lbfgs.hpp>
+#include <boost/math/optimization/minimizer.hpp>
+
+namespace rdiff = boost::math::differentiation::reverse_mode;
+namespace bopt = boost::math::optimization;
+
+BOOST_AUTO_TEST_SUITE(basic_lbfgs)
+
+BOOST_AUTO_TEST_CASE(default_lbfgs_test) //, T, all_float_types)
+{
+    using T                = double;
+    constexpr size_t NITER = 10;
+    constexpr size_t M     = 10;
+    const T          eps   = T{1e-8};
+
+    RandomSample<T>                  rng{T(-10), T(10)};
+    std::array<rdiff::rvar<T, 1>, 2> x;
+    x[0] = rng.next();
+    x[1] = rng.next();
+
+    auto opt = bopt::make_lbfgs(&rosenbrock_saddle<rdiff::rvar<T, 1>>, x, M);
+
+    auto result = minimize(opt);
+    for (auto& xi : x) {
+        BOOST_REQUIRE_CLOSE(xi, T{1.0}, eps);
+    }
+}
+
+// Custom initialization policy that zeros out the parameters
+template<typename RealType>
+struct zero_init_policy
+{
+    void operator()(std::vector<RealType>& x) const noexcept
+    {
+        std::fill(x.begin(), x.end(), RealType{0});
+    }
+};
+
+template<typename RealType>
+struct analytic_objective_eval_pol
+{
+    template<typename Objective, typename ArgumentContainer>
+    RealType operator()(Objective&& objective, ArgumentContainer& x)
+    {
+        return objective(x);
+    }
+};
+template<typename RealType>
+struct analytic_gradient_eval_pol
+{
+    template<class Objective, class ArgumentContainer, class FunctionEvaluationPolicy>
+    void operator()(Objective&&                obj_f,
+                    ArgumentContainer&         x,
+                    FunctionEvaluationPolicy&& f_eval_pol,
+                    RealType&                  obj_v,
+                    std::vector<RealType>&     grad_container)
+    {
+        RealType v = f_eval_pol(obj_f, x);
+        obj_v      = v;
+        grad_container.resize(x.size());
+        for (size_t i = 0; i < x.size(); ++i) {
+            grad_container[i] = 2 * x[i];
+        }
+    }
+};
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(custom_init_lbfgs_test, T, all_float_types)
+{
+    constexpr size_t M   = 8;
+    const T          eps = T{1e-6};
+
+    RandomSample<T>                  rng{T(-5), T(5)};
+    std::array<rdiff::rvar<T, 1>, 2> x;
+    x[0] = rng.next();
+    x[1] = rng.next();
+
+    auto opt    = bopt::make_lbfgs(&rosenbrock_saddle<rdiff::rvar<T, 1>>,
+                                x,
+                                M,
+                                bopt::costant_initializer_rvar<T>(0.0));
+    auto result = minimize(opt);
+
+    for (auto& xi : x) {
+        BOOST_REQUIRE_CLOSE(xi, T{1.0}, eps);
+    }
+}
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(analytic_lbfgs_test, T, all_float_types)
+{
+    constexpr size_t M   = 10;
+    const T          eps = T{1e-3};
+
+    RandomSample<T> rng{T(-5), T(5)};
+    std::vector<T>  x(3);
+    for (auto& xi : x)
+        xi = rng.next();
+
+    auto opt = bopt::make_lbfgs(&quadratic<T>,                    // Objective
+                                x,                                // Arguments
+                                M,                                // History size
+                                zero_init_policy<T>{},            // Initialization
+                                analytic_objective_eval_pol<T>{}, // Function eval
+                                analytic_gradient_eval_pol<T>{},  // Gradient eval
+                                bopt::armijo_line_search_policy<T>{});
+
+    auto result = minimize(opt);
+
+    for (auto& xi : x) {
+        BOOST_REQUIRE_SMALL(xi, eps);
+    }
+}
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/test_nesterov_optimizer.cpp b/test/test_nesterov_optimizer.cpp
new file mode 100644
index 0000000000..83e43db25e
--- /dev/null
+++ b/test/test_nesterov_optimizer.cpp
@@ -0,0 +1,30 @@
+//           Copyright Maksym Zhelyenzyakov 2025-2026.
+// Distributed under the Boost Software License, Version 1.0.
+//      (See accompanying file LICENSE_1_0.txt or copy at
+//           https://www.boost.org/LICENSE_1_0.txt)
+#include "test_autodiff_reverse.hpp" // reuse for some basic options
+#include "test_functions_for_optimization.hpp"
+#include <boost/math/differentiation/autodiff_reverse.hpp>
+#include <boost/math/optimization/minimizer.hpp>
+#include <boost/math/optimization/nesterov.hpp>
+namespace rdiff = boost::math::differentiation::reverse_mode;
+namespace bopt = boost::math::optimization;
+BOOST_AUTO_TEST_SUITE(nesterov_descent)
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(default_nesterov_test, T, all_float_types)
+{
+  T lr = T{ 1e-4 };
+  T mu = T{ 0.95 };
+  RandomSample<T> rng{ T(-10), (10) };
+  std::vector<rdiff::rvar<T, 1>> x;
+  x.push_back(rng.next());
+  x.push_back(rng.next());
+  T eps = T{ 1e-8 };
+  auto nag =
+    bopt::make_nag(&quadratic_high_cond_2D<rdiff::rvar<T, 1>>, x, lr, mu);
+  auto z = minimize(nag);
+  for (auto& xi : x) {
+    BOOST_REQUIRE_SMALL(xi.item(), eps);
+  }
+}
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/test_reverse_mode_autodiff_flat_linear_allocator.cpp b/test/test_reverse_mode_autodiff_flat_linear_allocator.cpp
index a2c2cfec84..5bb2c9b2d2 100644
--- a/test/test_reverse_mode_autodiff_flat_linear_allocator.cpp
+++ b/test/test_reverse_mode_autodiff_flat_linear_allocator.cpp
@@ -6,113 +6,129 @@
 #include <vector>
 BOOST_AUTO_TEST_SUITE(test_flat_linear_allocator)
 
-BOOST_AUTO_TEST_CASE_TEMPLATE(flat_linear_allocator_constructors, T, all_float_types)
+BOOST_AUTO_TEST_CASE_TEMPLATE(flat_linear_allocator_constructors,
+                              T,
+                              all_float_types)
 {
-    size_t                              buffer_size = 16;
-    RandomSample<T>                     rng{-1, 1};
-    rdiff_detail::flat_linear_allocator<T, 16> float_allocator{};
-    for (size_t i = 0; i < 2 * buffer_size - buffer_size / 2; i++) {
-        float_allocator.emplace_back(rng.next());
-    }
-
-    BOOST_CHECK_EQUAL(float_allocator.size(),
-                      static_cast<size_t>(2 * buffer_size - buffer_size / 2));
-    BOOST_CHECK_EQUAL(float_allocator.capacity(), static_cast<size_t>(2 * buffer_size));
-
-    float_allocator.clear();
-    BOOST_CHECK_EQUAL(float_allocator.size(), static_cast<size_t>(0));
-    BOOST_CHECK_EQUAL(float_allocator.capacity(), buffer_size);
-
-    for (size_t i = 0; i < 2 * buffer_size - buffer_size / 2; i++) {
-        float_allocator.emplace_back(rng.next());
-    }
-    float_allocator.reset();
-    BOOST_CHECK_EQUAL(float_allocator.size(), static_cast<size_t>(0));
-    BOOST_CHECK_EQUAL(float_allocator.capacity(), 2 * buffer_size);
-
-    for (size_t i = 0; i < 2 * buffer_size - buffer_size / 2; i++) {
-        float_allocator.emplace_back(rng.next());
-    }
-    T fill_value = T(0.25);
-    float_allocator.fill(fill_value);
-    for (size_t i = 0; i < float_allocator.size(); i++) {
-        BOOST_CHECK_EQUAL(float_allocator[i], fill_value);
-    }
+  size_t buffer_size = 16;
+  RandomSample<T> rng{ -1, 1 };
+  rdiff_detail::flat_linear_allocator<T, 16> float_allocator{};
+  for (size_t i = 0; i < 2 * buffer_size - buffer_size / 2; i++) {
+    float_allocator.emplace_back(rng.next());
+  }
+
+  BOOST_CHECK_EQUAL(float_allocator.size(),
+                    static_cast<size_t>(2 * buffer_size - buffer_size / 2));
+  BOOST_CHECK_EQUAL(float_allocator.capacity(),
+                    static_cast<size_t>(2 * buffer_size));
+
+  float_allocator.clear();
+  BOOST_CHECK_EQUAL(float_allocator.size(), static_cast<size_t>(0));
+  BOOST_CHECK_EQUAL(float_allocator.capacity(), buffer_size);
+
+  for (size_t i = 0; i < 2 * buffer_size - buffer_size / 2; i++) {
+    float_allocator.emplace_back(rng.next());
+  }
+  float_allocator.reset();
+  BOOST_CHECK_EQUAL(float_allocator.size(), static_cast<size_t>(0));
+  BOOST_CHECK_EQUAL(float_allocator.capacity(), 2 * buffer_size);
+
+  for (size_t i = 0; i < 2 * buffer_size - buffer_size / 2; i++) {
+    float_allocator.emplace_back(rng.next());
+  }
+  T fill_value = T(0.25);
+  float_allocator.fill(fill_value);
+  for (size_t i = 0; i < float_allocator.size(); i++) {
+    BOOST_CHECK_EQUAL(float_allocator[i], fill_value);
+  }
 }
 
-BOOST_AUTO_TEST_CASE_TEMPLATE(flat_linear_allocator_test_emplace, T, all_float_types)
+BOOST_AUTO_TEST_CASE_TEMPLATE(flat_linear_allocator_test_emplace,
+                              T,
+                              all_float_types)
 {
-    size_t                              buffer_size = 16;
-    RandomSample<T>                     rng{-1, 1};
-    rdiff_detail::flat_linear_allocator<T, 16> float_allocator{};
-    std::vector<T>                      test_vector;
-
-    for (size_t i = 0; i < 2 * buffer_size - 1; i++) {
-        test_vector.push_back(rng.next());
-        float_allocator.emplace_back(test_vector[i]);
-    }
-
-    auto it1 = float_allocator.template emplace_back_n<4>();
-    for (size_t i = 0; i < 4; i++) {
-        T literal = rng.next();
-        test_vector.push_back(literal);
-        *(it1 + i) = literal;
-    }
-
-    auto it2 = float_allocator.emplace_back_n(buffer_size);
-    for (size_t i = 0; i < buffer_size; i++) {
-        T literal = rng.next();
-        test_vector.push_back(literal);
-        *(it2 + i) = literal;
-    }
-
-    auto vit      = test_vector.begin();
-    auto alloc_it = float_allocator.begin();
-    for (; vit != test_vector.end(); vit++, alloc_it++) {
-        BOOST_CHECK_EQUAL(
-            *vit,
-            *alloc_it); // should be ok to check floats like this since they are expected to be the same.
-    }
-
-    for (size_t i = 0; i < test_vector.size(); i++) {
-        BOOST_CHECK_EQUAL(test_vector[i], float_allocator[i]); // check random access aswell;
-    }
-
-    BOOST_CHECK_EQUAL(test_vector.size(), float_allocator.size());
+  size_t buffer_size = 16;
+  RandomSample<T> rng{ -1, 1 };
+  rdiff_detail::flat_linear_allocator<T, 16> float_allocator{};
+  std::vector<T> test_vector;
+
+  for (size_t i = 0; i < 2 * buffer_size - 1; i++) {
+    test_vector.push_back(rng.next());
+    float_allocator.emplace_back(test_vector[i]);
+  }
+
+  auto it1 = float_allocator.template emplace_back_n<4>();
+  for (size_t i = 0; i < 4; i++) {
+    T literal = rng.next();
+    test_vector.push_back(literal);
+    *(it1 + i) = literal;
+  }
+
+  auto it2 = float_allocator.emplace_back_n(buffer_size);
+  for (size_t i = 0; i < buffer_size; i++) {
+    T literal = rng.next();
+    test_vector.push_back(literal);
+    *(it2 + i) = literal;
+  }
+
+  auto vit = test_vector.begin();
+  auto alloc_it = float_allocator.begin();
+  for (; vit != test_vector.end(); vit++, alloc_it++) {
+    BOOST_CHECK_EQUAL(*vit,
+                      *alloc_it); // should be ok to check floats like this
+                                  // since they are expected to be the same.
+  }
+
+  for (size_t i = 0; i < test_vector.size(); i++) {
+    BOOST_CHECK_EQUAL(test_vector[i],
+                      float_allocator[i]); // check random access aswell;
+  }
+
+  BOOST_CHECK_EQUAL(test_vector.size(), float_allocator.size());
 }
-BOOST_AUTO_TEST_CASE_TEMPLATE(flat_linear_allocator_test_checkpointing, T, all_float_types)
+
+BOOST_AUTO_TEST_CASE_TEMPLATE(flat_linear_allocator_test_checkpointing,
+                              T,
+                              all_float_types)
 {
-    size_t                              buffer_size = 16;
-    RandomSample<T>                     rng{-1, 1};
-    rdiff_detail::flat_linear_allocator<T, 16> float_allocator{};
-    std::vector<T>                      test_vector;
-    std::vector<size_t>                 checkpoint_indices{2, 11, 15, 16, 17, 28};
-    std::vector<T>                      expected_value_at_checkpoint;
-
-    size_t                              ckp_id = 0;
-    for (size_t i = 0; i < 2 * buffer_size; i++) {
-        T literal = rng.next();
-        float_allocator.emplace_back(literal);
-        if (ckp_id < checkpoint_indices.size() && i == checkpoint_indices[ckp_id]) {
-            float_allocator.add_checkpoint();
-            expected_value_at_checkpoint.push_back(literal);
-            ++ckp_id;
-        }
-    }
-    for (size_t i = 0; i < checkpoint_indices.size(); i++) {
-        auto it = float_allocator.checkpoint_at(i);
-        BOOST_CHECK_EQUAL(*it, expected_value_at_checkpoint[i]);
+  constexpr size_t buffer_size = 16;
+  RandomSample<T> rng{ -1, 1 };
+  rdiff_detail::flat_linear_allocator<T, buffer_size> float_allocator{};
+  std::vector<size_t> checkpoint_indices{ 2, 11, 15, 16, 17, 28 };
+  std::vector<T> expected_value_at_checkpoint;
+
+  size_t ckp_id = 0;
+  for (size_t i = 0; i < 2 * buffer_size; ++i) {
+    T literal = rng.next();
+    float_allocator.emplace_back(literal);
+
+    if (ckp_id < checkpoint_indices.size() &&
+        (i + 1) == checkpoint_indices[ckp_id]) {
+      float_allocator.add_checkpoint();
+      expected_value_at_checkpoint.push_back(literal);
+      ++ckp_id;
     }
-
-    auto first_ckp = float_allocator.first_checkpoint();
-    auto last_ckp  = float_allocator.last_checkpoint();
-
-    BOOST_CHECK_EQUAL(*first_ckp, expected_value_at_checkpoint[0]);
-    BOOST_CHECK_EQUAL(*last_ckp, expected_value_at_checkpoint.back());
-
-    float_allocator.rewind_to_last_checkpoint();
-    BOOST_CHECK_EQUAL(float_allocator.size(), checkpoint_indices.back());
-    BOOST_CHECK_EQUAL(float_allocator.capacity(), 2 * buffer_size);
+  }
+
+  for (size_t i = 0; i < checkpoint_indices.size(); ++i) {
+    auto it = float_allocator.checkpoint_at(i);
+    BOOST_REQUIRE(it != float_allocator.begin());
+    --it;
+    BOOST_CHECK_EQUAL(*it, expected_value_at_checkpoint[i]);
+  }
+
+  auto first_ckp = float_allocator.first_checkpoint();
+  BOOST_REQUIRE(first_ckp != float_allocator.begin());
+  --first_ckp;
+  BOOST_CHECK_EQUAL(*first_ckp, expected_value_at_checkpoint.front());
+
+  auto last_ckp = float_allocator.last_checkpoint();
+  BOOST_REQUIRE(last_ckp != float_allocator.begin());
+  --last_ckp;
+  BOOST_CHECK_EQUAL(*last_ckp, expected_value_at_checkpoint.back());
+
+  float_allocator.rewind_to_last_checkpoint();
+  BOOST_CHECK_EQUAL(float_allocator.size(), checkpoint_indices.back());
+  BOOST_CHECK_EQUAL(float_allocator.capacity(), 2 * buffer_size);
 }
-
 BOOST_AUTO_TEST_SUITE_END()