diff --git a/Jenkinsfile b/Jenkinsfile index cb3217f4025..163e8def66a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -150,7 +150,7 @@ pipeline { stage('Verify changes') { agent { label 'linux' } steps { - script { + script { retry(3) { checkout scm } sh 'git clean -xffd' @@ -240,6 +240,7 @@ pipeline { steps { deleteDir() unstash 'MathSetup' + sh "export STAN_NUM_THREADS=2" sh "echo CXX=${env.CXX} -Werror > make/local" sh "echo CPPFLAGS+=-DSTAN_THREADS >> make/local" runTests("test/unit -f thread") @@ -263,6 +264,7 @@ pipeline { steps { deleteDirWin() unstash 'MathSetup' + bat "setx STAN_NUM_THREADS 2" bat "echo CXX=${env.CXX} -Werror > make/local" bat "echo CXXFLAGS+=-DSTAN_THREADS >> make/local" runTestsWin("test/unit -f thread") @@ -272,7 +274,7 @@ pipeline { } } stage('Additional merge tests') { - when { + when { allOf { anyOf { branch 'develop' @@ -309,12 +311,12 @@ pipeline { } } stage('Upstream tests') { - when { + when { allOf { - expression { - env.BRANCH_NAME ==~ /PR-\d+/ + expression { + env.BRANCH_NAME ==~ /PR-\d+/ } - expression { + expression { !skipRemainingStages } } diff --git a/make/compiler_flags b/make/compiler_flags index 61adb29ddab..d2b13b975f4 100644 --- a/make/compiler_flags +++ b/make/compiler_flags @@ -271,7 +271,7 @@ print-compiler-flags: @echo ' - SUNDIALS ' $(SUNDIALS) @echo ' - TBB ' $(TBB) @echo ' - GTEST ' $(GTEST) - @echo ' - STAN_THREADS ' $(STAN_THREADS) + @echo ' - STAN_THREADS ' $(STAN_THREADS) @echo ' - STAN_OPENCL ' $(STAN_OPENCL) @echo ' - STAN_MPI ' $(STAN_MPI) @echo ' Compiler flags (each can be overriden separately):' diff --git a/stan/math/prim/functor.hpp b/stan/math/prim/functor.hpp index bfa72de41af..a029e13f4bb 100644 --- a/stan/math/prim/functor.hpp +++ b/stan/math/prim/functor.hpp @@ -18,5 +18,7 @@ #include #include #include +#include +#include #endif diff --git a/stan/math/prim/functor/reduce_sum.hpp b/stan/math/prim/functor/reduce_sum.hpp new file mode 100644 index 00000000000..25dc700b8eb --- /dev/null +++ b/stan/math/prim/functor/reduce_sum.hpp @@ -0,0 +1,272 @@ +#ifndef STAN_MATH_PRIM_FUNCTOR_REDUCE_SUM_HPP +#define STAN_MATH_PRIM_FUNCTOR_REDUCE_SUM_HPP + +#include + +#include +#include +#include + +#include +#include + +namespace stan { +namespace math { + +namespace internal { + +/** + * reduce_sum_impl implementation for any autodiff type. + * + * @tparam ReduceFunction Type of reducer function + * @tparam ReturnType An arithmetic type + * @tparam Vec Type of sliced argument + * @tparam Args Types of shared arguments + */ +template +struct reduce_sum_impl { + /** + * Call an instance of the function `ReduceFunction` on every element + * of an input sequence and sum these terms. + * + * This specialization is not parallelized and works for any autodiff types. + * + * An instance, f, of `ReduceFunction` should have the signature: + * T f(int start, int end, Vec&& vmapped_subset, std::ostream* msgs, + * Args&&... args) + * + * `ReduceFunction` must be default constructible without any arguments + * + * Each call to `ReduceFunction` is responsible for computing the + * start through end terms (inclusive) of the overall sum. All args are + * passed from this function through to the `ReduceFunction` instances. + * However, only the start through end (inclusive) elements of the vmapped + * argument are passed to the `ReduceFunction` instances (as the + * `vmapped_subset` argument). + * + * If auto partitioning is true, do the calculation with one + * ReduceFunction call. If false, break work into pieces strictly smaller + * than grainsize. + * + * grainsize must be greater than or equal to 1 + * + * @param vmapped Sliced arguments used only in some sum terms + * @param auto_partitioning Work partitioning style (ignored) + * @param grainsize Suggested grainsize for tbb + * @param[in, out] msgs The print stream for warning messages + * @param args Shared arguments used in every sum term + * @return Summation of all terms + */ + return_type_t operator()(Vec&& vmapped, bool auto_partitioning, + int grainsize, std::ostream* msgs, + Args&&... args) const { + const std::size_t num_jobs = vmapped.size(); + + if (num_jobs == 0) { + return 0.0; + } + + if (auto_partitioning) { + return ReduceFunction()(0, vmapped.size() - 1, std::forward(vmapped), + msgs, std::forward(args)...); + } else { + return_type_t sum = 0.0; + for (size_t i = 0; i < (vmapped.size() + grainsize - 1) / grainsize; + ++i) { + size_t start = i * grainsize; + size_t end = std::min((i + 1) * grainsize, vmapped.size()) - 1; + + std::decay_t sub_slice; + sub_slice.reserve(end - start + 1); + for (int i = start; i <= end; ++i) { + sub_slice.emplace_back(vmapped[i]); + } + + sum += ReduceFunction()(start, end, std::forward(sub_slice), msgs, + std::forward(args)...); + } + return sum; + } + } +}; + +/** + * Specialization of reduce_sum_impl for arithmetic types + * + * @tparam ReduceFunction Type of reducer function + * @tparam ReturnType An arithmetic type + * @tparam Vec Type of sliced argument + * @tparam Args Types of shared arguments + */ +template +struct reduce_sum_impl, + ReturnType, Vec, Args...> { + /** + * Internal object meeting the Imperative form requirements of + * `tbb::parallel_reduce` + * + * @note see link [here](https://tinyurl.com/vp7xw2t) for requirements. + */ + struct recursive_reducer { + Vec vmapped_; + std::ostream* msgs_; + std::tuple args_tuple_; + return_type_t sum_{0.0}; + + recursive_reducer(Vec&& vmapped, std::ostream* msgs, Args&&... args) + : vmapped_(std::forward(vmapped)), + msgs_(msgs), + args_tuple_(std::forward(args)...) {} + + /* + * This is the copy operator as required for tbb::parallel_reduce + * Imperative form. This requires sum_ be reset to zero. + */ + recursive_reducer(recursive_reducer& other, tbb::split) + : vmapped_(other.vmapped_), + msgs_(other.msgs_), + args_tuple_(other.args_tuple_) {} + + /** + * Compute the value and of `ReduceFunction` over the range defined by r + * and accumulate those in member variable sum_. This function may + * be called multiple times per object instantiation (so the sum_ + * must be accumulated, not just assigned). + * + * @param r Range over which to compute `ReduceFunction` + */ + void operator()(const tbb::blocked_range& r) { + if (r.empty()) { + return; + } + + std::decay_t sub_slice; + sub_slice.reserve(r.size()); + for (int i = r.begin(); i < r.end(); ++i) { + sub_slice.emplace_back(vmapped_[i]); + } + + sum_ += apply( + [&](auto&&... args) { + return ReduceFunction()(r.begin(), r.end() - 1, sub_slice, msgs_, + args...); + }, + this->args_tuple_); + } + + /** + * Join reducers. Accumuluate the value (sum_) of the other reducer. + * + * @param rhs Another partial sum + */ + void join(const recursive_reducer& child) { this->sum_ += child.sum_; } + }; + + /** + * Call an instance of the function `ReduceFunction` on every element + * of an input sequence and sum these terms. + * + * This specialization is parallelized using tbb and works only for + * arithmetic types. + * + * An instance, f, of `ReduceFunction` should have the signature: + * double f(int start, int end, Vec&& vmapped_subset, std::ostream* msgs, + * Args&&... args) + * + * `ReduceFunction` must be default constructible without any arguments + * + * Each call to `ReduceFunction` is responsible for computing the + * start through end (inclusive) terms of the overall sum. All args are + * passed from this function through to the `ReduceFunction` instances. + * However, only the start through end (inclusive) elements of the vmapped + * argument are passed to the `ReduceFunction` instances (as the + * `vmapped_subset` argument). + * + * This function distributes computation of the desired sum + * over multiple threads by coordinating calls to `ReduceFunction` + * instances. + * + * If auto partitioning is true, break work into pieces automatically, + * taking grainsize as a recommended work size (this process + * is not deterministic). If false, break work deterministically + * into pieces smaller than or equal to grainsize. The execution + * order is non-deterministic. + * + * grainsize must be greater than or equal to 1 + * + * @param vmapped Sliced arguments used only in some sum terms + * @param auto_partitioning Work partitioning style + * @param grainsize Suggested grainsize for tbb + * @param[in, out] msgs The print stream for warning messages + * @param args Shared arguments used in every sum term + * @return Summation of all terms + */ + ReturnType operator()(Vec&& vmapped, bool auto_partitioning, int grainsize, + std::ostream* msgs, Args&&... args) const { + const std::size_t num_jobs = vmapped.size(); + if (num_jobs == 0) { + return 0.0; + } + recursive_reducer worker(std::forward(vmapped), msgs, + std::forward(args)...); + + if (auto_partitioning) { + tbb::parallel_reduce( + tbb::blocked_range(0, num_jobs, grainsize), worker); + } else { + tbb::simple_partitioner partitioner; + tbb::parallel_deterministic_reduce( + tbb::blocked_range(0, num_jobs, grainsize), worker, + partitioner); + } + + return worker.sum_; + } +}; + +} // namespace internal + +/** + * Call an instance of the function `ReduceFunction` on every element + * of an input sequence and sum these terms. + * + * This defers to reduce_sum_impl for the appropriate implementation + * + * An instance, f, of `ReduceFunction` should have the signature: + * T f(int start, int end, Vec&& vmapped_subset, std::ostream* msgs, Args&&... + * args) + * + * `ReduceFunction` must be default constructible without any arguments + * + * grainsize must be greater than or equal to 1 + * + * @tparam ReduceFunction Type of reducer function + * @tparam ReturnType An arithmetic type + * @tparam Vec Type of sliced argument + * @tparam Args Types of shared arguments + * @param vmapped Sliced arguments used only in some sum terms + * @param grainsize Suggested grainsize for tbb + * @param[in, out] msgs The print stream for warning messages + * @param args Shared arguments used in every sum term + * @return Sum of terms + */ +template , typename... Args> +auto reduce_sum(Vec&& vmapped, int grainsize, std::ostream* msgs, + Args&&... args) { + using return_type = return_type_t; + + check_positive("reduce_sum", "grainsize", grainsize); + + return internal::reduce_sum_impl()(std::forward(vmapped), true, + grainsize, msgs, + std::forward(args)...); +} + +} // namespace math +} // namespace stan + +#endif diff --git a/stan/math/prim/functor/reduce_sum_static.hpp b/stan/math/prim/functor/reduce_sum_static.hpp new file mode 100644 index 00000000000..5b13701df93 --- /dev/null +++ b/stan/math/prim/functor/reduce_sum_static.hpp @@ -0,0 +1,57 @@ +#ifndef STAN_MATH_PRIM_FUNCTOR_REDUCE_SUM_STATIC_HPP +#define STAN_MATH_PRIM_FUNCTOR_REDUCE_SUM_STATIC_HPP + +#include + +#include +#include +#include + +#include +#include + +namespace stan { +namespace math { + +/** + * Call an instance of the function `ReduceFunction` on every element + * of an input sequence and sum these terms. + * + * This defers to reduce_sum_impl for the appropriate implementation + * + * An instance, f, of `ReduceFunction` should have the signature: + * T f(int start, int end, Vec&& vmapped_subset, std::ostream* msgs, Args&&... + * args) + * + * `ReduceFunction` must be default constructible without any arguments + * + * grainsize must be greater than or equal to 1 + * + * @tparam ReduceFunction Type of reducer function + * @tparam ReturnType An arithmetic type + * @tparam Vec Type of sliced argument + * @tparam Args Types of shared arguments + * @param vmapped Sliced arguments used only in some sum terms + * @param grainsize Suggested grainsize for tbb + * @param[in, out] msgs The print stream for warning messages + * @param args Shared arguments used in every sum term + * @return Sum of terms + */ +template , typename... Args> +auto reduce_sum_static(Vec&& vmapped, int grainsize, std::ostream* msgs, + Args&&... args) { + using return_type = return_type_t; + + check_positive("reduce_sum", "grainsize", grainsize); + + return internal::reduce_sum_impl()(std::forward(vmapped), false, + grainsize, msgs, + std::forward(args)...); +} + +} // namespace math +} // namespace stan + +#endif diff --git a/stan/math/prim/meta/broadcast_array.hpp b/stan/math/prim/meta/broadcast_array.hpp index 3c92f4e139b..41c44b7d954 100644 --- a/stan/math/prim/meta/broadcast_array.hpp +++ b/stan/math/prim/meta/broadcast_array.hpp @@ -13,12 +13,14 @@ namespace internal { template class broadcast_array { private: - T& prim_; + using T_decay = std::decay_t; + T_decay& prim_; public: explicit broadcast_array(T& prim) : prim_(prim) {} - T& operator[](int /*i*/) { return prim_; } + auto& operator[](int /*i*/) { return prim_; } + auto& operator[](int /*i*/) const { return prim_; } /** \ingroup type_trait * We can assign any right hand side which allows for indexing to a @@ -30,22 +32,33 @@ class broadcast_array { void operator=(const Y& m) { prim_ = m[0]; } + broadcast_array& operator=(const broadcast_array& other) = default; + broadcast_array& operator=(broadcast_array&& other) = default; }; template class empty_broadcast_array { + using T_decay = std::decay_t; + public: empty_broadcast_array() {} /** \ingroup type_trait * Not implemented so cannot be called. */ - T& operator[](int /*i*/); + T_decay& operator[](int /*i*/); + T_decay& operator[](int /*i*/) const; - /** \ingroup type_trait + /** * Not implemented so cannot be called. */ template void operator=(const Y& /*A*/); + + /** + * Not implemented so cannot be called. + */ + template + void operator+=(R); }; template @@ -55,31 +68,31 @@ class empty_broadcast_array> { public: empty_broadcast_array() {} - /** \ingroup type_trait + /** * Not implemented so cannot be called. */ ViewElt& operator[](int /*i*/); - /** \ingroup type_trait + /** * Not implemented so cannot be called. */ ViewElt& operator()(int /*i*/); - /** \ingroup type_trait + /** * Not implemented so cannot be called. */ void operator=(const T_arg& /*A*/); - /** \ingroup type_trait + /** * Not implemented so cannot be called. */ void operator+=(T_arg /*A*/); - /** \ingroup type_trait + /** * Not implemented so cannot be called. */ void operator-=(T_arg /*A*/); - /** \ingroup type_trait + /** * Not implemented so cannot be called. */ T& row(int /*i*/); - /** \ingroup type_trait + /** * Not implemented so cannot be called. */ T& col(int /*i*/); diff --git a/stan/math/prim/meta/operands_and_partials.hpp b/stan/math/prim/meta/operands_and_partials.hpp index 19d4d0febab..b553b9ee0a6 100644 --- a/stan/math/prim/meta/operands_and_partials.hpp +++ b/stan/math/prim/meta/operands_and_partials.hpp @@ -2,7 +2,6 @@ #define STAN_MATH_PRIM_META_OPERANDS_AND_PARTIALS_HPP #include -#include #include #include #include diff --git a/stan/math/rev/core.hpp b/stan/math/rev/core.hpp index 33eb934c08e..b7f1aeed314 100644 --- a/stan/math/rev/core.hpp +++ b/stan/math/rev/core.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ #include #include #include +#include #include #include #include diff --git a/stan/math/rev/functor.hpp b/stan/math/rev/functor.hpp index 0af65fe88e5..13566d8b8bc 100644 --- a/stan/math/rev/functor.hpp +++ b/stan/math/rev/functor.hpp @@ -20,5 +20,6 @@ #include #include #include +#include #endif diff --git a/stan/math/rev/functor/reduce_sum.hpp b/stan/math/rev/functor/reduce_sum.hpp new file mode 100644 index 00000000000..f6ba97a7e42 --- /dev/null +++ b/stan/math/rev/functor/reduce_sum.hpp @@ -0,0 +1,243 @@ +#ifndef STAN_MATH_REV_SCAL_FUNCTOR_REDUCE_SUM_HPP +#define STAN_MATH_REV_SCAL_FUNCTOR_REDUCE_SUM_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace stan { +namespace math { +namespace internal { + +/** + * Var specialization of reduce_sum_impl + * + * @tparam ReduceFunction Type of reducer function + * @tparam ReturnType Must be var + * @tparam Vec Type of sliced argument + * @tparam Args Types of shared arguments + */ +template +struct reduce_sum_impl, ReturnType, + Vec, Args...> { + /** + * Internal object meeting the Imperative form requirements of + * `tbb::parallel_reduce` + * + * @note see link [here](https://tinyurl.com/vp7xw2t) for requirements. + */ + struct recursive_reducer { + size_t per_job_sliced_terms_; + size_t num_shared_terms_; // Number of terms shared across threads + double* sliced_partials_; // Points to adjoints of the partial calculations + Vec vmapped_; + std::ostream* msgs_; + std::tuple args_tuple_; + double sum_{0.0}; + Eigen::VectorXd args_adjoints_{0}; + + template + recursive_reducer(size_t per_job_sliced_terms, size_t num_shared_terms, + double* sliced_partials, VecT&& vmapped, + std::ostream* msgs, ArgsT&&... args) + : per_job_sliced_terms_(per_job_sliced_terms), + num_shared_terms_(num_shared_terms), + sliced_partials_(sliced_partials), + vmapped_(std::forward(vmapped)), + msgs_(msgs), + args_tuple_(std::forward(args)...) {} + + /* + * This is the copy operator as required for tbb::parallel_reduce + * Imperative form. This requires the reduced values (sum_ and + * arg_adjoints_) be reset to zero. + */ + recursive_reducer(recursive_reducer& other, tbb::split) + : per_job_sliced_terms_(other.per_job_sliced_terms_), + num_shared_terms_(other.num_shared_terms_), + sliced_partials_(other.sliced_partials_), + vmapped_(other.vmapped_), + msgs_(other.msgs_), + args_tuple_(other.args_tuple_) {} + + /** + * Compute, using nested autodiff, the value and Jacobian of + * `ReduceFunction` called over the range defined by r and accumulate those + * in member variable sum_ (for the value) and args_adjoints_ (for the + * Jacobian). This function may be called multiple times per object + * instantiation (so the sum_ and args_adjoints_ must be accumulated, not + * just assigned). + * + * @param r Range over which to compute reduce_sum + */ + inline void operator()(const tbb::blocked_range& r) { + if (r.empty()) { + return; + } + + if (args_adjoints_.size() == 0) { + args_adjoints_ = Eigen::VectorXd::Zero(this->num_shared_terms_); + } + + // Initialize nested autodiff stack + const nested_rev_autodiff begin_nest; + + // Create nested autodiff copies of sliced argument that do not point + // back to main autodiff stack + std::decay_t local_sub_slice; + local_sub_slice.reserve(r.size()); + for (int i = r.begin(); i < r.end(); ++i) { + local_sub_slice.emplace_back(deep_copy_vars(vmapped_[i])); + } + + // Create nested autodiff copies of all shared arguments that do not point + // back to main autodiff stack + auto args_tuple_local_copy = apply( + [&](auto&&... args) { + return std::tuple( + deep_copy_vars(args)...); + }, + this->args_tuple_); + + // Perform calculation + var sub_sum_v = apply( + [&](auto&&... args) { + return ReduceFunction()(r.begin(), r.end() - 1, local_sub_slice, + this->msgs_, args...); + }, + args_tuple_local_copy); + + // Compute Jacobian + sub_sum_v.grad(); + + // Accumulate value of reduce_sum + sum_ += sub_sum_v.val(); + + // Accumulate adjoints of sliced_arguments + accumulate_adjoints( + this->sliced_partials_ + r.begin() * per_job_sliced_terms_, + local_sub_slice); + + // Accumulate adjoints of shared_arguments + apply( + [&](auto&&... args) { + accumulate_adjoints(args_adjoints_.data(), + std::forward(args)...); + }, + std::move(args_tuple_local_copy)); + } + + /** + * Join reducers. Accumuluate the value (sum_) and Jacobian (arg_adoints_) + * of the other reducer. + * + * @param rhs Another partial sum + */ + void join(const recursive_reducer& rhs) { + this->sum_ += rhs.sum_; + if (this->args_adjoints_.size() != 0 && rhs.args_adjoints_.size() != 0) { + this->args_adjoints_ += rhs.args_adjoints_; + } else if (this->args_adjoints_.size() == 0 + && rhs.args_adjoints_.size() != 0) { + this->args_adjoints_ = rhs.args_adjoints_; + } + } + }; + + /** + * Call an instance of the function `ReduceFunction` on every element + * of an input sequence and sum these terms. + * + * This specialization is parallelized using tbb and works for reverse + * mode autodiff. + * + * An instance, f, of `ReduceFunction` should have the signature: + * var f(int start, int end, Vec&& vmapped_subset, std::ostream* msgs, + * Args&&... args) + * + * `ReduceFunction` must be default constructible without any arguments + * + * Each call to `ReduceFunction` is responsible for computing the + * start through end (inclusive) terms of the overall sum. All args are + * passed from this function through to the `ReduceFunction` instances. + * However, only the start through end (inclusive) elements of the vmapped + * argument are passed to the `ReduceFunction` instances (as the + * `vmapped_subset` argument). + * + * This function distributes computation of the desired sum and the Jacobian + * of that sum over multiple threads by coordinating calls to `ReduceFunction` + * instances. Results are stored as precomputed varis in the autodiff tree. + * + * If auto partitioning is true, break work into pieces automatically, + * taking grainsize as a recommended work size (this process + * is not deterministic). If false, break work deterministically + * into pieces smaller than or equal to grainsize. The execution + * order is non-deterministic. + * + * @param vmapped Sliced arguments used only in some sum terms + * @param auto_partitioning Work partitioning style + * @param grainsize Suggested grainsize for tbb + * @param[in, out] msgs The print stream for warning messages + * @param args Shared arguments used in every sum term + * @return Summation of all terms + */ + inline var operator()(Vec&& vmapped, bool auto_partitioning, int grainsize, + std::ostream* msgs, Args&&... args) const { + const std::size_t num_jobs = vmapped.size(); + + if (num_jobs == 0) { + return var(0.0); + } + + const std::size_t per_job_sliced_terms = count_vars(vmapped[0]); + const std::size_t num_sliced_terms = num_jobs * per_job_sliced_terms; + const std::size_t num_shared_terms = count_vars(args...); + + vari** varis = ChainableStack::instance_->memalloc_.alloc_array( + num_sliced_terms + num_shared_terms); + double* partials = ChainableStack::instance_->memalloc_.alloc_array( + num_sliced_terms + num_shared_terms); + + for (size_t i = 0; i < num_sliced_terms; ++i) { + partials[i] = 0.0; + } + recursive_reducer worker(per_job_sliced_terms, num_shared_terms, partials, + vmapped, msgs, args...); + + if (auto_partitioning) { + tbb::parallel_reduce( + tbb::blocked_range(0, num_jobs, grainsize), worker); + } else { + tbb::simple_partitioner partitioner; + tbb::parallel_deterministic_reduce( + tbb::blocked_range(0, num_jobs, grainsize), worker, + partitioner); + } + + save_varis(varis, std::forward(vmapped)); + save_varis(varis + num_sliced_terms, std::forward(args)...); + + for (size_t i = 0; i < num_shared_terms; ++i) { + partials[num_sliced_terms + i] = worker.args_adjoints_(i); + } + + return var(new precomputed_gradients_vari( + worker.sum_, num_sliced_terms + num_shared_terms, varis, partials)); + } +}; +} // namespace internal + +} // namespace math +} // namespace stan + +#endif diff --git a/test/unit/math/mix/functor/reduce_sum_static_test.cpp b/test/unit/math/mix/functor/reduce_sum_static_test.cpp new file mode 100644 index 00000000000..844fdc16ebb --- /dev/null +++ b/test/unit/math/mix/functor/reduce_sum_static_test.cpp @@ -0,0 +1,457 @@ +#include +#include + +#include +#include + +std::ostream* msgs = nullptr; + +template * = nullptr> +T sum_(T arg) { + return arg; +} + +template * = nullptr> +auto sum_(EigMat&& arg) { + return stan::math::sum(arg); +} + +template * = nullptr> +auto sum_(Vec&& arg) { + stan::scalar_type_t sum = 0; + for (size_t i = 0; i < arg.size(); ++i) { + sum += sum_(arg[i]); + } + return sum; +} + +struct sum_lpdf { + template + inline auto operator()(std::size_t start, std::size_t end, T&& sub_slice, + std::ostream* msgs, Args&&... args) const { + using return_type = stan::return_type_t; + + return sum_(sub_slice) + + sub_slice.size() + * stan::math::sum(std::vector{ + return_type(sum_(std::forward(args)))...}); + } +}; + +TEST(MathMix_reduce_sum_static, grainsize) { + auto f1 = [](auto&& data) { + return stan::math::reduce_sum_static(data, 0, msgs); + }; + auto f2 = [](auto&& data) { + return stan::math::reduce_sum_static(data, -1, msgs); + }; + auto f3 = [](auto&& data) { + return stan::math::reduce_sum_static(data, 1, msgs); + }; + auto f4 = [](auto&& data) { + return stan::math::reduce_sum_static(data, 20, msgs); + }; + + std::vector data(5, 10.0); + + stan::test::expect_ad(f1, data); + stan::test::expect_ad(f2, data); + stan::test::expect_ad(f3, data); + stan::test::expect_ad(f4, data); +} + +TEST(MathMix_reduce_sum_static, std_vector_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum_static(data, 1, msgs); + }; + + std::vector data(5, 10.0); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum_static, std_vector_std_vector_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum_static(data, 1, msgs); + }; + + std::vector> data(3, std::vector(2, 10.0)); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum_static, std_vector_eigen_vector_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum_static(data, 1, msgs); + }; + + std::vector data(3, Eigen::VectorXd::Ones(2)); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum_static, std_vector_eigen_row_vector_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum_static(data, 1, msgs); + }; + + std::vector data(3, Eigen::RowVectorXd::Ones(2)); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum_static, std_vector_eigen_matrix_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum_static(data, 1, msgs); + }; + + std::vector data(3, Eigen::MatrixXd::Ones(2, 4)); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum_static, std_vector_std_vector_std_vector_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum_static(data, 1, msgs); + }; + + std::vector>> data( + 3, std::vector>(2, std::vector(2, 10.0))); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum_static, + std_vector_std_vector_eigen_vector_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum_static(data, 1, msgs); + }; + + std::vector> data( + 3, std::vector(2, Eigen::VectorXd::Ones(2))); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum_static, + std_vector_std_vector_eigen_row_vector_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum_static(data, 1, msgs); + }; + + std::vector> data( + 3, std::vector(2, Eigen::RowVectorXd::Ones(2))); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum_static, + std_vector_std_vector_eigen_matrix_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum_static(data, 1, msgs); + }; + + std::vector> data( + 3, std::vector(2, Eigen::MatrixXd::Ones(2, 4))); + + stan::test::expect_ad(f, data); +} + +struct start_end_lpdf { + template + inline auto operator()(std::size_t start, std::size_t end, T1&&, + std::ostream* msgs, T2&& data) const { + stan::return_type_t sum = 0; + EXPECT_GE(start, 0); + EXPECT_LE(end, data.size() - 1); + for (size_t i = start; i <= end; i++) { + sum += data[i]; + } + return sum; + } +}; + +TEST(StanMath_reduce_sum_static, start_end_slice) { + auto start_end = [](auto&& arg) { + return stan::math::reduce_sum_static(arg, 1, msgs, arg); + }; + + std::vector data(5, 1.0); + + stan::test::expect_ad(start_end, data); +} + +auto fi = [](auto&&... args) { + return stan::math::reduce_sum_static(std::vector(2, 10.0), 1, + msgs, args...); +}; + +auto fd = [](auto&& data, auto&&... args) { + return stan::math::reduce_sum_static(data, 1, msgs, args...); +}; + +TEST(MathMix_reduce_sum_static, int_arg) { + std::vector data(2, 1.0); + int arg = 5.0; + + stan::test::expect_ad([&](auto&& data) { return fd(data, arg); }, data); +} + +TEST(MathMix_reduce_sum_static, double_arg) { + std::vector data(2, 10.0); + double arg = 5.0; + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum_static, std_vector_int_arg) { + std::vector data(2, 10.0); + std::vector arg(2, 10); + + stan::test::expect_ad([&](auto&& data) { return fd(data, arg); }, data); +} + +TEST(MathMix_reduce_sum_static, std_vector_double_arg) { + std::vector data(2, 10.0); + std::vector arg(2, 10.0); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum_static, eigen_vector_arg) { + std::vector data(2, 10.0); + Eigen::VectorXd arg = Eigen::VectorXd::Ones(2); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum_static, eigen_row_vector_arg) { + std::vector data(2, 10.0); + Eigen::RowVectorXd arg = Eigen::RowVectorXd::Ones(2); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum_static, eigen_matrix_arg) { + std::vector data(2, 10.0); + Eigen::MatrixXd arg = Eigen::MatrixXd::Ones(2, 2); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum_static, std_vector_std_vector_double_arg) { + std::vector data(2, 10.0); + std::vector> arg(2, std::vector(2, 10.0)); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum_static, std_vector_eigen_vector_arg) { + std::vector data(2, 10.0); + std::vector arg(2, Eigen::VectorXd::Ones(2)); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum_static, std_vector_eigen_row_vector_arg) { + std::vector data(2, 10.0); + std::vector arg(2, Eigen::RowVectorXd::Ones(2)); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum_static, std_vector_eigen_matrix_arg) { + std::vector data(2, 10.0); + std::vector arg(2, Eigen::MatrixXd::Ones(2, 2)); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum_static, std_vector_std_vector_std_vector_double_arg) { + std::vector data(2, 10.0); + std::vector>> arg( + 2, std::vector>(2, std::vector(2, 10.0))); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum_static, std_vector_std_vector_eigen_vector_arg) { + std::vector data(2, 10.0); + std::vector> arg( + 2, std::vector(2, Eigen::VectorXd::Ones(2))); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum_static, std_vector_std_vector_eigen_row_vector_arg) { + std::vector data(2, 10.0); + std::vector> arg( + 2, std::vector(2, Eigen::RowVectorXd::Ones(2))); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum_static, std_vector_std_vector_eigen_matrix_arg) { + std::vector data(2, 10.0); + std::vector> arg( + 2, std::vector(2, Eigen::MatrixXd::Ones(2, 2))); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum_static, eigen_three_args1) { + Eigen::VectorXd arg1 = Eigen::VectorXd::Ones(2); + Eigen::RowVectorXd arg2 = Eigen::RowVectorXd::Ones(2); + Eigen::MatrixXd arg3 = Eigen::MatrixXd::Ones(2, 2); + + stan::test::expect_ad(fi, arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum_static, eigen_three_args2) { + double arg1 = 1.0; + std::vector arg2(2, 1.0); + Eigen::MatrixXd arg3 = Eigen::MatrixXd::Ones(2, 2); + + stan::test::expect_ad(fi, arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum_static, eigen_three_args3) { + double arg1 = 1.0; + std::vector> arg2(2, std::vector(2, 1.0)); + std::vector arg3(2, Eigen::MatrixXd::Ones(2, 2)); + + stan::test::expect_ad(fi, arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum_static, eigen_three_args_with_ints1) { + Eigen::VectorXd arg1 = Eigen::VectorXd::Ones(2); + Eigen::RowVectorXd arg2 = Eigen::RowVectorXd::Ones(2); + Eigen::MatrixXd arg3 = Eigen::MatrixXd::Ones(2, 2); + + stan::test::expect_ad( + [&](auto&& arg1, auto&& arg2, auto&& arg3) { + return fi(1, arg1, std::vector{1, 2, 3}, arg2, 3, arg3); + }, + arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum_static, eigen_three_args_with_ints2) { + double arg1 = 1.0; + std::vector arg2(2, 1.0); + Eigen::MatrixXd arg3 = Eigen::MatrixXd::Ones(2, 2); + + stan::test::expect_ad( + [&](auto&& arg1, auto&& arg2, auto&& arg3) { + return fi(1, arg1, std::vector{1, 2, 3}, arg2, 3, arg3); + }, + arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum_static, eigen_three_args_with_ints3) { + double arg1 = 1.0; + std::vector> arg2(2, std::vector(2, 1.0)); + std::vector arg3(2, Eigen::MatrixXd::Ones(2, 2)); + + stan::test::expect_ad( + [&](auto&& arg1, auto&& arg2, auto&& arg3) { + return fi(1, arg1, std::vector{1, 2, 3}, arg2, 3, arg3); + }, + arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum_static, eigen_three_args_with_doubles1) { + Eigen::VectorXd arg1 = Eigen::VectorXd::Ones(2); + Eigen::RowVectorXd arg2 = Eigen::RowVectorXd::Ones(2); + Eigen::MatrixXd arg3 = Eigen::MatrixXd::Ones(2, 2); + + stan::test::expect_ad( + [&](auto&& arg1, auto&& arg2, auto&& arg3) { + return fi(std::vector{1.0, 2.0, 3.0}, arg1, 3.0, arg2, + std::vector{1.0, 2.0, 3.0}, arg3); + }, + arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum_static, eigen_three_args_with_doubles2) { + double arg1 = 1.0; + std::vector arg2(2, 1.0); + Eigen::MatrixXd arg3 = Eigen::MatrixXd::Ones(2, 2); + + stan::test::expect_ad( + [&](auto&& arg1, auto&& arg2, auto&& arg3) { + return fi(std::vector{1.0, 2.0, 3.0}, arg1, 3.0, arg2, + std::vector{1.0, 2.0, 3.0}, arg3); + }, + arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum_static, eigen_three_args_with_doubles3) { + double arg1 = 1.0; + std::vector> arg2(2, std::vector(2, 1.0)); + std::vector arg3(2, Eigen::MatrixXd::Ones(2, 2)); + + stan::test::expect_ad( + [&](auto&& arg1, auto&& arg2, auto&& arg3) { + return fi(std::vector{1.0, 2.0, 3.0}, arg1, 3.0, arg2, + std::vector{1.0, 2.0, 3.0}, arg3); + }, + arg1, arg2, arg3); +} + +template +struct static_check_lpdf { + template + inline auto operator()(std::size_t start, std::size_t end, + const std::vector&, std::ostream* msgs, + const std::vector& data) const { + T sum = 0; + // std::cout << "start: " << start << ", end: " << end << ", grainsize: " << + // grainsize << std::endl; + EXPECT_LE(end - start + 1, grainsize); + for (size_t i = start; i <= end; i++) { + sum += data[i]; + } + return sum; + } +}; + +TEST(StanMathPrim_reduce_sum_static, static_check) { + stan::math::init_threadpool_tbb(); + + for (auto size : {1, 3, 6, 11}) { + std::vector data(size, 10); + std::vector arg(size, 10); + + auto fi1 = [&](auto&&... args) { + return stan::math::reduce_sum_static>(data, 1, msgs, + args...); + }; + + auto fi2 = [&](auto&&... args) { + return stan::math::reduce_sum_static>(data, 2, msgs, + args...); + }; + + auto fi3 = [&](auto&&... args) { + return stan::math::reduce_sum_static>(data, 3, msgs, + args...); + }; + + stan::test::expect_ad(fi1, arg); + stan::test::expect_ad(fi2, arg); + stan::test::expect_ad(fi3, arg); + } +} diff --git a/test/unit/math/mix/functor/reduce_sum_test.cpp b/test/unit/math/mix/functor/reduce_sum_test.cpp new file mode 100644 index 00000000000..ed520821d33 --- /dev/null +++ b/test/unit/math/mix/functor/reduce_sum_test.cpp @@ -0,0 +1,409 @@ +#include +#include + +#include +#include + +std::ostream* msgs = nullptr; + +template * = nullptr> +T sum_(T arg) { + return arg; +} + +template * = nullptr> +auto sum_(EigMat&& arg) { + return stan::math::sum(arg); +} + +template * = nullptr> +auto sum_(Vec&& arg) { + stan::scalar_type_t sum = 0; + for (size_t i = 0; i < arg.size(); ++i) { + sum += sum_(arg[i]); + } + return sum; +} + +struct sum_lpdf { + template + inline auto operator()(std::size_t start, std::size_t end, T&& sub_slice, + std::ostream* msgs, Args&&... args) const { + using return_type = stan::return_type_t; + + return sum_(sub_slice) + + sub_slice.size() + * stan::math::sum(std::vector{ + return_type(sum_(std::forward(args)))...}); + } +}; + +TEST(MathMix_reduce_sum, grainsize) { + auto f1 = [](auto&& data) { + return stan::math::reduce_sum(data, 0, msgs); + }; + auto f2 = [](auto&& data) { + return stan::math::reduce_sum(data, -1, msgs); + }; + auto f3 = [](auto&& data) { + return stan::math::reduce_sum(data, 1, msgs); + }; + auto f4 = [](auto&& data) { + return stan::math::reduce_sum(data, 20, msgs); + }; + + std::vector data(5, 10.0); + + stan::test::expect_ad(f1, data); + stan::test::expect_ad(f2, data); + stan::test::expect_ad(f3, data); + stan::test::expect_ad(f4, data); +} + +TEST(MathMix_reduce_sum, std_vector_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum(data, 1, msgs); + }; + + std::vector data(5, 10.0); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum, std_vector_std_vector_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum(data, 1, msgs); + }; + + std::vector> data(3, std::vector(2, 10.0)); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum, std_vector_eigen_vector_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum(data, 1, msgs); + }; + + std::vector data(3, Eigen::VectorXd::Ones(2)); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum, std_vector_eigen_row_vector_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum(data, 1, msgs); + }; + + std::vector data(3, Eigen::RowVectorXd::Ones(2)); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum, std_vector_eigen_matrix_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum(data, 1, msgs); + }; + + std::vector data(3, Eigen::MatrixXd::Ones(2, 4)); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum, std_vector_std_vector_std_vector_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum(data, 1, msgs); + }; + + std::vector>> data( + 3, std::vector>(2, std::vector(2, 10.0))); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum, std_vector_std_vector_eigen_vector_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum(data, 1, msgs); + }; + + std::vector> data( + 3, std::vector(2, Eigen::VectorXd::Ones(2))); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum, std_vector_std_vector_eigen_row_vector_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum(data, 1, msgs); + }; + + std::vector> data( + 3, std::vector(2, Eigen::RowVectorXd::Ones(2))); + + stan::test::expect_ad(f, data); +} + +TEST(MathMix_reduce_sum, std_vector_std_vector_eigen_matrix_double_slice) { + auto f = [](auto&& data) { + return stan::math::reduce_sum(data, 1, msgs); + }; + + std::vector> data( + 3, std::vector(2, Eigen::MatrixXd::Ones(2, 4))); + + stan::test::expect_ad(f, data); +} + +struct start_end_lpdf { + template + inline auto operator()(std::size_t start, std::size_t end, T1&&, + std::ostream* msgs, T2&& data) const { + stan::return_type_t sum = 0; + EXPECT_GE(start, 0); + EXPECT_LE(end, data.size() - 1); + for (size_t i = start; i <= end; i++) { + sum += data[i]; + } + return sum; + } +}; + +TEST(StanMath_reduce_sum, start_end_slice) { + auto start_end = [](auto&& arg) { + return stan::math::reduce_sum(arg, 1, msgs, arg); + }; + + std::vector data(5, 1.0); + + stan::test::expect_ad(start_end, data); +} + +auto fi = [](auto&&... args) { + return stan::math::reduce_sum(std::vector(2, 10.0), 1, msgs, + args...); +}; + +auto fd = [](auto&& data, auto&&... args) { + return stan::math::reduce_sum(data, 1, msgs, args...); +}; + +TEST(MathMix_reduce_sum, int_arg) { + std::vector data(2, 1.0); + int arg = 5.0; + + stan::test::expect_ad([&](auto&& data) { return fd(data, arg); }, data); +} + +TEST(MathMix_reduce_sum, double_arg) { + std::vector data(2, 10.0); + double arg = 5.0; + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum, std_vector_int_arg) { + std::vector data(2, 10.0); + std::vector arg(2, 10); + + stan::test::expect_ad([&](auto&& data) { return fd(data, arg); }, data); +} + +TEST(MathMix_reduce_sum, std_vector_double_arg) { + std::vector data(2, 10.0); + std::vector arg(2, 10.0); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum, eigen_vector_arg) { + std::vector data(2, 10.0); + Eigen::VectorXd arg = Eigen::VectorXd::Ones(2); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum, eigen_row_vector_arg) { + std::vector data(2, 10.0); + Eigen::RowVectorXd arg = Eigen::RowVectorXd::Ones(2); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum, eigen_matrix_arg) { + std::vector data(2, 10.0); + Eigen::MatrixXd arg = Eigen::MatrixXd::Ones(2, 2); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum, std_vector_std_vector_double_arg) { + std::vector data(2, 10.0); + std::vector> arg(2, std::vector(2, 10.0)); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum, std_vector_eigen_vector_arg) { + std::vector data(2, 10.0); + std::vector arg(2, Eigen::VectorXd::Ones(2)); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum, std_vector_eigen_row_vector_arg) { + std::vector data(2, 10.0); + std::vector arg(2, Eigen::RowVectorXd::Ones(2)); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum, std_vector_eigen_matrix_arg) { + std::vector data(2, 10.0); + std::vector arg(2, Eigen::MatrixXd::Ones(2, 2)); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum, std_vector_std_vector_std_vector_double_arg) { + std::vector data(2, 10.0); + std::vector>> arg( + 2, std::vector>(2, std::vector(2, 10.0))); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum, std_vector_std_vector_eigen_vector_arg) { + std::vector data(2, 10.0); + std::vector> arg( + 2, std::vector(2, Eigen::VectorXd::Ones(2))); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum, std_vector_std_vector_eigen_row_vector_arg) { + std::vector data(2, 10.0); + std::vector> arg( + 2, std::vector(2, Eigen::RowVectorXd::Ones(2))); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum, std_vector_std_vector_eigen_matrix_arg) { + std::vector data(2, 10.0); + std::vector> arg( + 2, std::vector(2, Eigen::MatrixXd::Ones(2, 2))); + + stan::test::expect_ad(fi, arg); + stan::test::expect_ad(fd, data, arg); +} + +TEST(MathMix_reduce_sum, eigen_three_args1) { + Eigen::VectorXd arg1 = Eigen::VectorXd::Ones(2); + Eigen::RowVectorXd arg2 = Eigen::RowVectorXd::Ones(2); + Eigen::MatrixXd arg3 = Eigen::MatrixXd::Ones(2, 2); + + stan::test::expect_ad(fi, arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum, eigen_three_args2) { + double arg1 = 1.0; + std::vector arg2(2, 1.0); + Eigen::MatrixXd arg3 = Eigen::MatrixXd::Ones(2, 2); + + stan::test::expect_ad(fi, arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum, eigen_three_args3) { + double arg1 = 1.0; + std::vector> arg2(2, std::vector(2, 1.0)); + std::vector arg3(2, Eigen::MatrixXd::Ones(2, 2)); + + stan::test::expect_ad(fi, arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum, eigen_three_args_with_ints1) { + Eigen::VectorXd arg1 = Eigen::VectorXd::Ones(2); + Eigen::RowVectorXd arg2 = Eigen::RowVectorXd::Ones(2); + Eigen::MatrixXd arg3 = Eigen::MatrixXd::Ones(2, 2); + + stan::test::expect_ad( + [&](auto&& arg1, auto&& arg2, auto&& arg3) { + return fi(1, arg1, std::vector{1, 2, 3}, arg2, 3, arg3); + }, + arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum, eigen_three_args_with_ints2) { + double arg1 = 1.0; + std::vector arg2(2, 1.0); + Eigen::MatrixXd arg3 = Eigen::MatrixXd::Ones(2, 2); + + stan::test::expect_ad( + [&](auto&& arg1, auto&& arg2, auto&& arg3) { + return fi(1, arg1, std::vector{1, 2, 3}, arg2, 3, arg3); + }, + arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum, eigen_three_args_with_ints3) { + double arg1 = 1.0; + std::vector> arg2(2, std::vector(2, 1.0)); + std::vector arg3(2, Eigen::MatrixXd::Ones(2, 2)); + + stan::test::expect_ad( + [&](auto&& arg1, auto&& arg2, auto&& arg3) { + return fi(1, arg1, std::vector{1, 2, 3}, arg2, 3, arg3); + }, + arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum, eigen_three_args_with_doubles1) { + Eigen::VectorXd arg1 = Eigen::VectorXd::Ones(2); + Eigen::RowVectorXd arg2 = Eigen::RowVectorXd::Ones(2); + Eigen::MatrixXd arg3 = Eigen::MatrixXd::Ones(2, 2); + + stan::test::expect_ad( + [&](auto&& arg1, auto&& arg2, auto&& arg3) { + return fi(std::vector{1.0, 2.0, 3.0}, arg1, 3.0, arg2, + std::vector{1.0, 2.0, 3.0}, arg3); + }, + arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum, eigen_three_args_with_doubles2) { + double arg1 = 1.0; + std::vector arg2(2, 1.0); + Eigen::MatrixXd arg3 = Eigen::MatrixXd::Ones(2, 2); + + stan::test::expect_ad( + [&](auto&& arg1, auto&& arg2, auto&& arg3) { + return fi(std::vector{1.0, 2.0, 3.0}, arg1, 3.0, arg2, + std::vector{1.0, 2.0, 3.0}, arg3); + }, + arg1, arg2, arg3); +} + +TEST(MathMix_reduce_sum, eigen_three_args_with_doubles3) { + double arg1 = 1.0; + std::vector> arg2(2, std::vector(2, 1.0)); + std::vector arg3(2, Eigen::MatrixXd::Ones(2, 2)); + + stan::test::expect_ad( + [&](auto&& arg1, auto&& arg2, auto&& arg3) { + return fi(std::vector{1.0, 2.0, 3.0}, arg1, 3.0, arg2, + std::vector{1.0, 2.0, 3.0}, arg3); + }, + arg1, arg2, arg3); +} diff --git a/test/unit/math/prim/functor/reduce_sum_static_test.cpp b/test/unit/math/prim/functor/reduce_sum_static_test.cpp new file mode 100644 index 00000000000..8c6ee821d54 --- /dev/null +++ b/test/unit/math/prim/functor/reduce_sum_static_test.cpp @@ -0,0 +1,476 @@ +#include +#include +#include +#include +#include +#include +#include + +std::ostream* msgs = nullptr; + +// reduce functor which is the BinaryFunction +// here we use iterators which represent integer indices +template +struct count_lpdf { + count_lpdf() {} + + // does the reduction in the sub-slice start to end + inline T operator()(std::size_t start, std::size_t end, + const std::vector& sub_slice, std::ostream* msgs, + const std::vector& lambda, + const std::vector& idata) const { + return stan::math::poisson_lpmf(sub_slice, lambda[0]); + } +}; + +TEST(StanMathPrim_reduce_sum_static, value) { + stan::math::init_threadpool_tbb(); + + double lambda_d = 10.0; + const std::size_t elems = 10000; + const std::size_t num_iter = 1000; + std::vector data(elems); + + for (std::size_t i = 0; i != elems; ++i) + data[i] = i; + + std::vector idata; + std::vector vlambda_d(1, lambda_d); + + double poisson_lpdf = stan::math::reduce_sum_static>( + data, 5, msgs, vlambda_d, idata); + + double poisson_lpdf_ref = stan::math::poisson_lpmf(data, lambda_d); + + EXPECT_FLOAT_EQ(poisson_lpdf, poisson_lpdf_ref); +} + +// ******************************** +// test if nested parallelism works +// ******************************** + +template +struct nesting_count_lpdf { + nesting_count_lpdf() {} + + // does the reduction in the sub-slice start to end + inline T operator()(std::size_t start, std::size_t end, + const std::vector& sub_slice, std::ostream* msgs, + const std::vector& lambda, + const std::vector& idata) const { + return stan::math::reduce_sum_static>(sub_slice, 5, msgs, + lambda, idata); + } +}; + +TEST(StanMathPrim_reduce_sum_static, nesting_value) { + stan::math::init_threadpool_tbb(); + + double lambda_d = 10.0; + const std::size_t elems = 10000; + const std::size_t num_iter = 1000; + std::vector data(elems); + + for (std::size_t i = 0; i != elems; ++i) + data[i] = i; + + std::vector idata; + std::vector vlambda_d(1, lambda_d); + + double poisson_lpdf = stan::math::reduce_sum_static>( + data, 5, msgs, vlambda_d, idata); + + double poisson_lpdf_ref = stan::math::poisson_lpmf(data, lambda_d); + + EXPECT_FLOAT_EQ(poisson_lpdf, poisson_lpdf_ref); +} + +template * = nullptr> +T sum_(T arg) { + return arg; +} + +template * = nullptr> +auto sum_(EigMat&& arg) { + return stan::math::sum(arg); +} + +template * = nullptr> +auto sum_(Vec&& arg) { + stan::scalar_type_t sum = 0; + for (size_t i = 0; i < arg.size(); ++i) { + sum += sum_(arg[i]); + } + return sum; +} + +struct sum_lpdf { + template + inline auto operator()(std::size_t start, std::size_t end, T&& sub_slice, + std::ostream* msgs, Args&&... args) const { + using return_type = stan::return_type_t; + + return sum_(sub_slice) + + sub_slice.size() + * stan::math::sum(std::vector{ + return_type(sum_(std::forward(args)))...}); + } +}; + +TEST(StanMathPrim_reduce_sum_static, grainsize) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10); + + EXPECT_THROW(stan::math::reduce_sum_static(data, 0, msgs), + std::domain_error); + + EXPECT_THROW(stan::math::reduce_sum_static(data, -1, msgs), + std::domain_error); + + EXPECT_NO_THROW(stan::math::reduce_sum_static(data, 1, msgs)); + + EXPECT_NO_THROW( + stan::math::reduce_sum_static(data, 3 * data.size(), msgs)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_int_slice) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10); + + EXPECT_EQ(50, stan::math::reduce_sum_static(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_double_slice) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + + EXPECT_DOUBLE_EQ(50.0, + stan::math::reduce_sum_static(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_std_vector_double_slice) { + stan::math::init_threadpool_tbb(); + + std::vector> data(5, std::vector(2, 10.0)); + + EXPECT_DOUBLE_EQ(100.0, + stan::math::reduce_sum_static(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_std_vector_int_slice) { + stan::math::init_threadpool_tbb(); + + std::vector> data(5, std::vector(2, 10)); + + EXPECT_EQ(100, stan::math::reduce_sum_static(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_eigen_vector_slice) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, Eigen::VectorXd::Ones(2)); + + EXPECT_DOUBLE_EQ(10.0, + stan::math::reduce_sum_static(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_eigen_row_vector_slice) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, Eigen::RowVectorXd::Ones(2)); + + EXPECT_DOUBLE_EQ(10.0, + stan::math::reduce_sum_static(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_eigen_matrix_slice) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, Eigen::MatrixXd::Ones(2, 2)); + + EXPECT_DOUBLE_EQ(20.0, + stan::math::reduce_sum_static(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum_static, + std_vector_std_vector_std_vector_int_slice) { + stan::math::init_threadpool_tbb(); + + std::vector>> data( + 5, std::vector>(2, std::vector(2, 10.0))); + + EXPECT_DOUBLE_EQ(200, stan::math::reduce_sum_static(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum_static, + std_vector_std_vector_std_vector_double_slice) { + stan::math::init_threadpool_tbb(); + + std::vector>> data( + 5, std::vector>(2, std::vector(2, 10.0))); + + EXPECT_DOUBLE_EQ(200.0, + stan::math::reduce_sum_static(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_std_vector_eigen_vector_slice) { + stan::math::init_threadpool_tbb(); + + std::vector> data( + 5, std::vector(2, Eigen::VectorXd::Ones(2))); + + EXPECT_DOUBLE_EQ(20.0, + stan::math::reduce_sum_static(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum_static, + std_vector_std_vector_eigen_row_vector_slice) { + stan::math::init_threadpool_tbb(); + + std::vector> data( + 5, std::vector(2, Eigen::RowVectorXd::Ones(2))); + + EXPECT_DOUBLE_EQ(20.0, + stan::math::reduce_sum_static(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_std_vector_eigen_matrix_slice) { + stan::math::init_threadpool_tbb(); + + std::vector> data( + 5, std::vector(2, Eigen::MatrixXd::Ones(2, 2))); + + EXPECT_DOUBLE_EQ(40.0, + stan::math::reduce_sum_static(data, 1, msgs)); +} + +struct start_end_lpdf { + inline auto operator()(std::size_t start, std::size_t end, + const std::vector&, std::ostream* msgs, + const std::vector& data) const { + int sum = 0; + EXPECT_GE(start, 0); + EXPECT_LE(end, data.size() - 1); + for (size_t i = start; i <= end; i++) { + sum += data[i]; + } + return sum; + } +}; + +TEST(StanMathPrim_reduce_sum_static, start_end_slice) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10); + + EXPECT_EQ(50, + stan::math::reduce_sum_static(data, 1, msgs, data)); +} + +TEST(StanMathPrim_reduce_sum_static, int_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + int arg = 5; + + EXPECT_DOUBLE_EQ(5 * (10 + 5), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, double_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + double arg = 5.0; + + EXPECT_DOUBLE_EQ(5 * (10.0 + 5.0), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_int_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector arg(5, 10); + + EXPECT_DOUBLE_EQ(5 * (10 + 5 * 10), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_double_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector arg(5, 10.0); + + EXPECT_DOUBLE_EQ(5 * (10 + 5 * 10), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, eigen_vector_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + Eigen::VectorXd arg = Eigen::VectorXd::Ones(5); + + EXPECT_DOUBLE_EQ(5 * (10 + 5), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, eigen_row_vector_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + Eigen::RowVectorXd arg = Eigen::RowVectorXd::Ones(5); + + EXPECT_DOUBLE_EQ(5 * (10 + 5), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, eigen_matrix_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + Eigen::MatrixXd arg = Eigen::MatrixXd::Ones(5, 5); + + EXPECT_DOUBLE_EQ(5 * (10 + 5 * 5), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_std_vector_double_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector> arg(5, std::vector(5, 10.0)); + + EXPECT_DOUBLE_EQ(5 * (10 + 250), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_eigen_vector_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector arg(2, Eigen::VectorXd::Ones(5)); + + EXPECT_DOUBLE_EQ(5 * (10 + 10), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_eigen_row_vector_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector arg(2, Eigen::RowVectorXd::Ones(5)); + + EXPECT_DOUBLE_EQ(5 * (10 + 10), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_eigen_matrix_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector arg(2, Eigen::MatrixXd::Ones(5, 5)); + + EXPECT_DOUBLE_EQ(5 * (10 + 50), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, + std_vector_std_vector_std_vector_double_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector>> arg( + 5, std::vector>(5, std::vector(5, 10.0))); + + EXPECT_DOUBLE_EQ(5 * (10 + 1250), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_std_vector_eigen_vector_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector> arg( + 2, std::vector(2, Eigen::VectorXd::Ones(5))); + + EXPECT_DOUBLE_EQ(5 * (10 + 20), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, + std_vector_std_vector_eigen_row_vector_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector> arg( + 2, std::vector(2, Eigen::RowVectorXd::Ones(5))); + + EXPECT_DOUBLE_EQ(5 * (10 + 20), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, std_vector_std_vector_eigen_matrix_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector> arg( + 2, std::vector(2, Eigen::MatrixXd::Ones(5, 3))); + + EXPECT_DOUBLE_EQ(5 * (10 + 60), + stan::math::reduce_sum_static(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum_static, sum) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 1.0); + int arg1 = 1; + double arg2 = 1.0; + std::vector arg3(5, 1); + std::vector arg4(5, 1.0); + Eigen::VectorXd arg5 = Eigen::VectorXd::Ones(5); + Eigen::RowVectorXd arg6 = Eigen::RowVectorXd::Ones(5); + Eigen::MatrixXd arg7 = Eigen::MatrixXd::Ones(5, 5); + std::vector> arg8(2, arg4); + std::vector arg9(2, arg5); + + EXPECT_DOUBLE_EQ( + 5 + 5 * (1 + 1 + 5 + 5 + 5 + 5 + 25 + 10 + 10), + stan::math::reduce_sum_static( + data, 1, msgs, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9)); +} + +template +struct static_check_lpdf { + inline auto operator()(std::size_t start, std::size_t end, + const std::vector&, std::ostream* msgs, + const std::vector& data) const { + int sum = 0; + EXPECT_LE(end - start + 1, grainsize); + for (size_t i = start; i <= end; i++) { + sum += data[i]; + } + return sum; + } +}; + +TEST(StanMathPrim_reduce_sum_static, static_check) { + stan::math::init_threadpool_tbb(); + + for (auto size : {10, 16, 19, 31}) { + std::vector data(size, 10); + + EXPECT_NO_THROW(stan::math::reduce_sum_static>( + data, 1, msgs, data)); + EXPECT_NO_THROW(stan::math::reduce_sum_static>( + data, 2, msgs, data)); + EXPECT_NO_THROW(stan::math::reduce_sum_static>( + data, 3, msgs, data)); + } +} diff --git a/test/unit/math/prim/functor/reduce_sum_test.cpp b/test/unit/math/prim/functor/reduce_sum_test.cpp new file mode 100644 index 00000000000..fe4473f3e7d --- /dev/null +++ b/test/unit/math/prim/functor/reduce_sum_test.cpp @@ -0,0 +1,432 @@ +#include +#include +#include +#include +#include +#include +#include + +std::ostream* msgs = nullptr; + +// reduce functor which is the BinaryFunction +// here we use iterators which represent integer indices +template +struct count_lpdf { + count_lpdf() {} + + // does the reduction in the sub-slice start to end + inline T operator()(std::size_t start, std::size_t end, + const std::vector& sub_slice, std::ostream* msgs, + const std::vector& lambda, + const std::vector& idata) const { + return stan::math::poisson_lpmf(sub_slice, lambda[0]); + } +}; + +TEST(StanMathPrim_reduce_sum, value) { + stan::math::init_threadpool_tbb(); + + double lambda_d = 10.0; + const std::size_t elems = 10000; + const std::size_t num_iter = 1000; + std::vector data(elems); + + for (std::size_t i = 0; i != elems; ++i) + data[i] = i; + + std::vector idata; + std::vector vlambda_d(1, lambda_d); + + double poisson_lpdf = stan::math::reduce_sum>( + data, 5, msgs, vlambda_d, idata); + + double poisson_lpdf_ref = stan::math::poisson_lpmf(data, lambda_d); + + EXPECT_FLOAT_EQ(poisson_lpdf, poisson_lpdf_ref); +} + +// ******************************** +// test if nested parallelism works +// ******************************** + +template +struct nesting_count_lpdf { + nesting_count_lpdf() {} + + // does the reduction in the sub-slice start to end + inline T operator()(std::size_t start, std::size_t end, + const std::vector& sub_slice, std::ostream* msgs, + const std::vector& lambda, + const std::vector& idata) const { + return stan::math::reduce_sum>(sub_slice, 5, msgs, lambda, + idata); + } +}; + +TEST(StanMathPrim_reduce_sum, nesting_value) { + stan::math::init_threadpool_tbb(); + + double lambda_d = 10.0; + const std::size_t elems = 10000; + const std::size_t num_iter = 1000; + std::vector data(elems); + + for (std::size_t i = 0; i != elems; ++i) + data[i] = i; + + std::vector idata; + std::vector vlambda_d(1, lambda_d); + + double poisson_lpdf = stan::math::reduce_sum>( + data, 5, msgs, vlambda_d, idata); + + double poisson_lpdf_ref = stan::math::poisson_lpmf(data, lambda_d); + + EXPECT_FLOAT_EQ(poisson_lpdf, poisson_lpdf_ref); +} + +template * = nullptr> +T sum_(T arg) { + return arg; +} + +template * = nullptr> +auto sum_(EigMat&& arg) { + return stan::math::sum(arg); +} + +template * = nullptr> +auto sum_(Vec&& arg) { + stan::scalar_type_t sum = 0; + for (size_t i = 0; i < arg.size(); ++i) { + sum += sum_(arg[i]); + } + return sum; +} + +struct sum_lpdf { + template + inline auto operator()(std::size_t start, std::size_t end, T&& sub_slice, + std::ostream* msgs, Args&&... args) const { + using return_type = stan::return_type_t; + + return sum_(sub_slice) + + sub_slice.size() + * stan::math::sum(std::vector{ + return_type(sum_(std::forward(args)))...}); + } +}; + +TEST(StanMathPrim_reduce_sum, grainsize) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10); + + EXPECT_THROW(stan::math::reduce_sum(data, 0, msgs), + std::domain_error); + + EXPECT_THROW(stan::math::reduce_sum(data, -1, msgs), + std::domain_error); + + EXPECT_NO_THROW(stan::math::reduce_sum(data, 1, msgs)); + + EXPECT_NO_THROW( + stan::math::reduce_sum(data, 3 * data.size(), msgs)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_int_slice) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10); + + EXPECT_EQ(50, stan::math::reduce_sum(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_double_slice) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + + EXPECT_DOUBLE_EQ(50.0, stan::math::reduce_sum(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_std_vector_double_slice) { + stan::math::init_threadpool_tbb(); + + std::vector> data(5, std::vector(2, 10.0)); + + EXPECT_DOUBLE_EQ(100.0, stan::math::reduce_sum(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_std_vector_int_slice) { + stan::math::init_threadpool_tbb(); + + std::vector> data(5, std::vector(2, 10)); + + EXPECT_EQ(100, stan::math::reduce_sum(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_eigen_vector_slice) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, Eigen::VectorXd::Ones(2)); + + EXPECT_DOUBLE_EQ(10.0, stan::math::reduce_sum(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_eigen_row_vector_slice) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, Eigen::RowVectorXd::Ones(2)); + + EXPECT_DOUBLE_EQ(10.0, stan::math::reduce_sum(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_eigen_matrix_slice) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, Eigen::MatrixXd::Ones(2, 2)); + + EXPECT_DOUBLE_EQ(20.0, stan::math::reduce_sum(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_std_vector_std_vector_int_slice) { + stan::math::init_threadpool_tbb(); + + std::vector>> data( + 5, std::vector>(2, std::vector(2, 10.0))); + + EXPECT_DOUBLE_EQ(200, stan::math::reduce_sum(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_std_vector_std_vector_double_slice) { + stan::math::init_threadpool_tbb(); + + std::vector>> data( + 5, std::vector>(2, std::vector(2, 10.0))); + + EXPECT_DOUBLE_EQ(200.0, stan::math::reduce_sum(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_std_vector_eigen_vector_slice) { + stan::math::init_threadpool_tbb(); + + std::vector> data( + 5, std::vector(2, Eigen::VectorXd::Ones(2))); + + EXPECT_DOUBLE_EQ(20.0, stan::math::reduce_sum(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_std_vector_eigen_row_vector_slice) { + stan::math::init_threadpool_tbb(); + + std::vector> data( + 5, std::vector(2, Eigen::RowVectorXd::Ones(2))); + + EXPECT_DOUBLE_EQ(20.0, stan::math::reduce_sum(data, 1, msgs)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_std_vector_eigen_matrix_slice) { + stan::math::init_threadpool_tbb(); + + std::vector> data( + 5, std::vector(2, Eigen::MatrixXd::Ones(2, 2))); + + EXPECT_DOUBLE_EQ(40.0, stan::math::reduce_sum(data, 1, msgs)); +} + +struct start_end_lpdf { + inline auto operator()(std::size_t start, std::size_t end, + const std::vector&, std::ostream* msgs, + const std::vector& data) const { + int sum = 0; + EXPECT_GE(start, 0); + EXPECT_LE(end, data.size() - 1); + for (size_t i = start; i <= end; i++) { + sum += data[i]; + } + return sum; + } +}; + +TEST(StanMathPrim_reduce_sum, start_end_slice) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10); + + EXPECT_EQ(50, stan::math::reduce_sum(data, 1, msgs, data)); +} + +TEST(StanMathPrim_reduce_sum, int_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + int arg = 5; + + EXPECT_DOUBLE_EQ(5 * (10 + 5), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, double_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + double arg = 5.0; + + EXPECT_DOUBLE_EQ(5 * (10.0 + 5.0), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_int_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector arg(5, 10); + + EXPECT_DOUBLE_EQ(5 * (10 + 5 * 10), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_double_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector arg(5, 10.0); + + EXPECT_DOUBLE_EQ(5 * (10 + 5 * 10), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, eigen_vector_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + Eigen::VectorXd arg = Eigen::VectorXd::Ones(5); + + EXPECT_DOUBLE_EQ(5 * (10 + 5), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, eigen_row_vector_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + Eigen::RowVectorXd arg = Eigen::RowVectorXd::Ones(5); + + EXPECT_DOUBLE_EQ(5 * (10 + 5), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, eigen_matrix_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + Eigen::MatrixXd arg = Eigen::MatrixXd::Ones(5, 5); + + EXPECT_DOUBLE_EQ(5 * (10 + 5 * 5), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_std_vector_double_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector> arg(5, std::vector(5, 10.0)); + + EXPECT_DOUBLE_EQ(5 * (10 + 250), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_eigen_vector_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector arg(2, Eigen::VectorXd::Ones(5)); + + EXPECT_DOUBLE_EQ(5 * (10 + 10), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_eigen_row_vector_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector arg(2, Eigen::RowVectorXd::Ones(5)); + + EXPECT_DOUBLE_EQ(5 * (10 + 10), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_eigen_matrix_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector arg(2, Eigen::MatrixXd::Ones(5, 5)); + + EXPECT_DOUBLE_EQ(5 * (10 + 50), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_std_vector_std_vector_double_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector>> arg( + 5, std::vector>(5, std::vector(5, 10.0))); + + EXPECT_DOUBLE_EQ(5 * (10 + 1250), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_std_vector_eigen_vector_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector> arg( + 2, std::vector(2, Eigen::VectorXd::Ones(5))); + + EXPECT_DOUBLE_EQ(5 * (10 + 20), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_std_vector_eigen_row_vector_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector> arg( + 2, std::vector(2, Eigen::RowVectorXd::Ones(5))); + + EXPECT_DOUBLE_EQ(5 * (10 + 20), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, std_vector_std_vector_eigen_matrix_arg) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 10.0); + std::vector> arg( + 2, std::vector(2, Eigen::MatrixXd::Ones(5, 3))); + + EXPECT_DOUBLE_EQ(5 * (10 + 60), + stan::math::reduce_sum(data, 1, msgs, arg)); +} + +TEST(StanMathPrim_reduce_sum, sum) { + stan::math::init_threadpool_tbb(); + + std::vector data(5, 1.0); + int arg1 = 1; + double arg2 = 1.0; + std::vector arg3(5, 1); + std::vector arg4(5, 1.0); + Eigen::VectorXd arg5 = Eigen::VectorXd::Ones(5); + Eigen::RowVectorXd arg6 = Eigen::RowVectorXd::Ones(5); + Eigen::MatrixXd arg7 = Eigen::MatrixXd::Ones(5, 5); + std::vector> arg8(2, arg4); + std::vector arg9(2, arg5); + + EXPECT_DOUBLE_EQ( + 5 + 5 * (1 + 1 + 5 + 5 + 5 + 5 + 25 + 10 + 10), + stan::math::reduce_sum(data, 1, msgs, arg1, arg2, arg3, arg4, + arg5, arg6, arg7, arg8, arg9)); +} diff --git a/test/unit/math/rev/functor/reduce_sum_test.cpp b/test/unit/math/rev/functor/reduce_sum_test.cpp new file mode 100644 index 00000000000..810f4b2d52c --- /dev/null +++ b/test/unit/math/rev/functor/reduce_sum_test.cpp @@ -0,0 +1,397 @@ +#include +#include +#include +#include +#include +#include +#include + +std::ostream* msgs = nullptr; + +// reduce functor which is the BinaryFunction +// here we use iterators which represent integer indices +template +struct count_lpdf { + count_lpdf() {} + + // does the reduction in the sub-slice start to end + inline T operator()(std::size_t start, std::size_t end, + const std::vector& sub_slice, std::ostream* msgs, + const std::vector& lambda, + const std::vector& idata) const { + return stan::math::poisson_lpmf(sub_slice, lambda[0]); + } +}; + +TEST(StanMathRev_reduce_sum, value) { + stan::math::init_threadpool_tbb(); + + double lambda_d = 10.0; + const std::size_t elems = 10000; + const std::size_t num_iter = 1000; + std::vector data(elems); + + for (std::size_t i = 0; i != elems; ++i) + data[i] = i; + + std::vector idata; + std::vector vlambda_d(1, lambda_d); + + double poisson_lpdf = stan::math::reduce_sum>( + data, 5, msgs, vlambda_d, idata); + + double poisson_lpdf_ref = stan::math::poisson_lpmf(data, lambda_d); + + EXPECT_FLOAT_EQ(poisson_lpdf, poisson_lpdf_ref) + << "ref value of poisson lpdf : " << poisson_lpdf_ref << std::endl + << "value of poisson lpdf : " << poisson_lpdf << std::endl; +} + +TEST(StanMathRev_reduce_sum, gradient) { + stan::math::init_threadpool_tbb(); + + double lambda_d = 10.0; + const std::size_t elems = 10000; + const std::size_t num_iter = 1000; + std::vector data(elems); + + for (std::size_t i = 0; i != elems; ++i) + data[i] = i; + + using stan::math::var; + + var lambda_v = lambda_d; + + std::vector idata; + std::vector vlambda_v(1, lambda_v); + + var poisson_lpdf = stan::math::reduce_sum>(data, 5, msgs, + vlambda_v, idata); + + var lambda_ref = lambda_d; + var poisson_lpdf_ref = stan::math::poisson_lpmf(data, lambda_ref); + + EXPECT_FLOAT_EQ(value_of(poisson_lpdf), value_of(poisson_lpdf_ref)); + + stan::math::grad(poisson_lpdf_ref.vi_); + const double lambda_ref_adj = lambda_ref.adj(); + + stan::math::set_zero_all_adjoints(); + stan::math::grad(poisson_lpdf.vi_); + const double lambda_adj = lambda_v.adj(); + + EXPECT_FLOAT_EQ(lambda_adj, lambda_ref_adj) + << "ref value of poisson lpdf : " << poisson_lpdf_ref.val() << std::endl + << "ref gradient wrt to lambda: " << lambda_ref_adj << std::endl + << "value of poisson lpdf : " << poisson_lpdf.val() << std::endl + << "gradient wrt to lambda: " << lambda_adj << std::endl; + + stan::math::recover_memory(); +} +TEST(StanMathRev_reduce_sum, grainsize) { + stan::math::init_threadpool_tbb(); + + double lambda_d = 10.0; + const std::size_t elems = 10000; + const std::size_t num_iter = 1000; + std::vector data(elems); + + for (std::size_t i = 0; i != elems; ++i) + data[i] = i; + + using stan::math::var; + + var lambda_v = lambda_d; + + std::vector idata; + std::vector vlambda_v(1, lambda_v); + + EXPECT_THROW( + stan::math::reduce_sum>(data, 0, msgs, vlambda_v, idata), + std::domain_error); + + EXPECT_THROW( + stan::math::reduce_sum>(data, -1, msgs, vlambda_v, idata), + std::domain_error); + + EXPECT_NO_THROW( + stan::math::reduce_sum>(data, 1, msgs, vlambda_v, idata)); + + EXPECT_NO_THROW(stan::math::reduce_sum>(data, 2 * elems, msgs, + vlambda_v, idata)); + + stan::math::recover_memory(); +} + +// ******************************** +// test if nested parallelism works +// ******************************** +template +struct nesting_count_lpdf { + nesting_count_lpdf() {} + + // does the reduction in the sub-slice start to end + inline T operator()(std::size_t start, std::size_t end, + const std::vector& sub_slice, std::ostream* msgs, + const std::vector& lambda, + const std::vector& idata) const { + return stan::math::reduce_sum>(sub_slice, 5, msgs, lambda, + idata); + } +}; + +TEST(StanMathRev_reduce_sum, nesting_gradient) { + stan::math::init_threadpool_tbb(); + + double lambda_d = 10.0; + const std::size_t elems = 10000; + const std::size_t num_iter = 1000; + std::vector data(elems); + + for (std::size_t i = 0; i != elems; ++i) + data[i] = i; + + using stan::math::var; + + var lambda_v = lambda_d; + + std::vector idata; + std::vector vlambda_v(1, lambda_v); + + var poisson_lpdf = stan::math::reduce_sum>( + data, 5, msgs, vlambda_v, idata); + + var lambda_ref = lambda_d; + var poisson_lpdf_ref = stan::math::poisson_lpmf(data, lambda_ref); + + EXPECT_FLOAT_EQ(value_of(poisson_lpdf), value_of(poisson_lpdf_ref)); + + stan::math::grad(poisson_lpdf_ref.vi_); + const double lambda_ref_adj = lambda_ref.adj(); + + stan::math::set_zero_all_adjoints(); + stan::math::grad(poisson_lpdf.vi_); + const double lambda_adj = lambda_v.adj(); + + EXPECT_FLOAT_EQ(lambda_adj, lambda_ref_adj) + << "ref value of poisson lpdf : " << poisson_lpdf_ref.val() << std::endl + << "ref gradient wrt to lambda: " << lambda_ref_adj << std::endl + << "value of poisson lpdf : " << poisson_lpdf.val() << std::endl + << "gradient wrt to lambda: " << lambda_adj << std::endl; + + stan::math::recover_memory(); +} + +// ******************************** +// basic performance test for a hierarchical model +// ******************************** + +template +struct grouped_count_lpdf { + grouped_count_lpdf() {} + + // does the reduction in the sub-slice start to end + template + inline T operator()(std::size_t start, std::size_t end, VecInt1&& sub_slice, + std::ostream* msgs, VecT&& lambda, VecInt2&& gidx) const { + const std::size_t num_terms = end - start + 1; + // std::cout << "sub-slice " << start << " - " << end << "; num_terms = " << + // num_terms << "; size = " << sub_slice.size() << std::endl; + std::decay_t lambda_slice(num_terms); + for (std::size_t i = 0; i != num_terms; ++i) + lambda_slice[i] = lambda[gidx[start + i]]; + + return stan::math::poisson_lpmf(sub_slice, lambda_slice); + } +}; + +TEST(StanMathRev_reduce_sum, grouped_gradient) { + stan::math::init_threadpool_tbb(); + + double lambda_d = 10.0; + const std::size_t groups = 10; + const std::size_t elems_per_group = 1000; + const std::size_t elems = groups * elems_per_group; + const std::size_t num_iter = 1000; + + std::vector data(elems); + std::vector gidx(elems); + + for (std::size_t i = 0; i != elems; ++i) { + data[i] = i; + gidx[i] = i / elems_per_group; + } + + using stan::math::var; + + std::vector vlambda_v; + + for (std::size_t i = 0; i != groups; ++i) + vlambda_v.push_back(i + 0.2); + + var lambda_v = vlambda_v[0]; + + var poisson_lpdf = stan::math::reduce_sum>( + data, 5, msgs, vlambda_v, gidx); + + std::vector vref_lambda_v; + for (std::size_t i = 0; i != elems; ++i) { + vref_lambda_v.push_back(vlambda_v[gidx[i]]); + } + var lambda_ref = vlambda_v[0]; + var poisson_lpdf_ref = stan::math::poisson_lpmf(data, vref_lambda_v); + + EXPECT_FLOAT_EQ(value_of(poisson_lpdf), value_of(poisson_lpdf_ref)); + + stan::math::grad(poisson_lpdf_ref.vi_); + const double lambda_ref_adj = lambda_ref.adj(); + + stan::math::set_zero_all_adjoints(); + stan::math::grad(poisson_lpdf.vi_); + const double lambda_adj = lambda_v.adj(); + + EXPECT_FLOAT_EQ(lambda_adj, lambda_ref_adj) + << "ref value of poisson lpdf : " << poisson_lpdf_ref.val() << std::endl + << "ref gradient wrt to lambda: " << lambda_ref_adj << std::endl + << "value of poisson lpdf : " << poisson_lpdf.val() << std::endl + << "gradient wrt to lambda: " << lambda_adj << std::endl; + + stan::math::recover_memory(); +} + +TEST(StanMathRev_reduce_sum, grouped_gradient_eigen) { + stan::math::init_threadpool_tbb(); + + double lambda_d = 10.0; + const std::size_t groups = 10; + const std::size_t elems_per_group = 1000; + const std::size_t elems = groups * elems_per_group; + const std::size_t num_iter = 1000; + + std::vector data(elems); + std::vector gidx(elems); + + for (std::size_t i = 0; i != elems; ++i) { + data[i] = i; + gidx[i] = i / elems_per_group; + } + using stan::math::var; + + Eigen::Matrix vlambda_v(groups); + + for (std::size_t i = 0; i != groups; ++i) + vlambda_v[i] = i + 0.2; + var lambda_v = vlambda_v[0]; + + var poisson_lpdf = stan::math::reduce_sum>( + data, 5, msgs, vlambda_v, gidx); + + std::vector vref_lambda_v; + for (std::size_t i = 0; i != elems; ++i) { + vref_lambda_v.push_back(vlambda_v[gidx[i]]); + } + var lambda_ref = vlambda_v[0]; + + var poisson_lpdf_ref = stan::math::poisson_lpmf(data, vref_lambda_v); + + EXPECT_FLOAT_EQ(value_of(poisson_lpdf), value_of(poisson_lpdf_ref)); + + stan::math::grad(poisson_lpdf_ref.vi_); + const double lambda_ref_adj = lambda_ref.adj(); + + stan::math::set_zero_all_adjoints(); + stan::math::grad(poisson_lpdf.vi_); + const double lambda_adj = lambda_v.adj(); + + EXPECT_FLOAT_EQ(lambda_adj, lambda_ref_adj) + << "ref value of poisson lpdf : " << poisson_lpdf_ref.val() << std::endl + << "ref gradient wrt to lambda: " << lambda_ref_adj << std::endl + << "value of poisson lpdf : " << poisson_lpdf.val() << std::endl + << "gradient wrt to lambda: " << lambda_adj << std::endl; + + stan::math::recover_memory(); +} + +// ******************************** +// slice over the grouping variable which is a var +// ******************************** + +template +struct slice_group_count_lpdf { + slice_group_count_lpdf() {} + + // does the reduction in the sub-slice start to end + inline T operator()(std::size_t start, std::size_t end, + const std::vector& lambda_slice, std::ostream* msgs, + const std::vector& y, + const std::vector& gsidx) const { + const std::size_t num_groups = end - start + 1; + T result = 0.0; + for (std::size_t i = 0; i != num_groups; ++i) { + std::vector y_group(y.begin() + gsidx[start + i], + y.begin() + gsidx[start + i + 1]); + result += stan::math::poisson_lpmf(y_group, lambda_slice[i]); + } + return result; + } +}; + +TEST(StanMathRev_reduce_sum, slice_group_gradient) { + stan::math::init_threadpool_tbb(); + + double lambda_d = 10.0; + const std::size_t groups = 10; + const std::size_t elems_per_group = 1000; + const std::size_t elems = groups * elems_per_group; + const std::size_t num_iter = 1000; + + std::vector data(elems); + std::vector gidx(elems); + std::vector gsidx(groups + 1); + + for (std::size_t i = 0, k = 0; i != groups; ++i) { + gsidx[i] = k; + for (std::size_t j = 0; j != elems_per_group; ++j, ++k) { + data[k] = k; + gidx[k] = i; + } + gsidx[i + 1] = k; + } + + using stan::math::var; + + std::vector vlambda_v; + + for (std::size_t i = 0; i != groups; ++i) + vlambda_v.push_back(i + 0.2); + + var lambda_v = vlambda_v[0]; + + var poisson_lpdf = stan::math::reduce_sum>( + vlambda_v, 5, msgs, data, gsidx); + + std::vector vref_lambda_v; + for (std::size_t i = 0; i != elems; ++i) { + vref_lambda_v.push_back(vlambda_v[gidx[i]]); + } + var lambda_ref = vlambda_v[0]; + + var poisson_lpdf_ref = stan::math::poisson_lpmf(data, vref_lambda_v); + + EXPECT_FLOAT_EQ(value_of(poisson_lpdf), value_of(poisson_lpdf_ref)); + + stan::math::grad(poisson_lpdf_ref.vi_); + const double lambda_ref_adj = lambda_ref.adj(); + + stan::math::set_zero_all_adjoints(); + stan::math::grad(poisson_lpdf.vi_); + const double lambda_adj = lambda_v.adj(); + + EXPECT_FLOAT_EQ(lambda_adj, lambda_ref_adj) + << "ref value of poisson lpdf : " << poisson_lpdf_ref.val() << std::endl + << "ref gradient wrt to lambda: " << lambda_ref_adj << std::endl + << "value of poisson lpdf : " << poisson_lpdf.val() << std::endl + << "gradient wrt to lambda: " << lambda_adj << std::endl; + + stan::math::recover_memory(); +}