From 24d340075de9677464ee73c68c6b70182ce1a3cc Mon Sep 17 00:00:00 2001 From: Trevor Vincent Date: Sun, 12 Jan 2020 23:14:11 -0500 Subject: [PATCH 01/10] Add openmp parallelization to repeated hafnian --- include/repeated_hafnian.hpp | 499 +++++++++++++++++++++-------------- 1 file changed, 301 insertions(+), 198 deletions(-) diff --git a/include/repeated_hafnian.hpp b/include/repeated_hafnian.hpp index 6fd960ac7..66e582fe2 100644 --- a/include/repeated_hafnian.hpp +++ b/include/repeated_hafnian.hpp @@ -16,203 +16,294 @@ * \rst * Contains functions for computing the hafnian using the algorithm * described in *From moments of sum to moments of product*, - * `doi:10.1016/j.jmva.2007.01.013 `__. - * \endrst + * `doi:10.1016/j.jmva.2007.01.013 + * `__. \endrst */ #pragma once #include namespace libwalrus { +/** + * Converts a linear index to a multi index + * e.g. if we wanted the multi-index (i,j) of an element in a 2x2 matrix given + * a linear index of 3 in the array storing the matrix, the maxes vector would + * be {1,1} and this function would return (1,0) + * + * @param linear_index the "flattened" index + * @param maxes a vector of integers, representing the max index value + * of each indice in the multi-index object. + * + * @return hafnian of the input matrix + */ + +std::vector lin_to_multi(int linear_index, const std::vector &maxes) { + std::vector i(maxes.size(), 0); + int l = 0; + int s0 = maxes[0] + 1; + i[l] = linear_index % s0; + int prod = s0; + int gl = linear_index; + + while (l < i.size() - 1) { + if (prod > linear_index) { + break; + } else { + int glp1 = (gl - i[l]) / (maxes[l] + 1); + i[l + 1] = glp1 % (maxes[l + 1] + 1); + prod *= (maxes[1 + 1] + 1); + gl = glp1; + } + l++; + } + return i; +} + +/** + * Returns the binomial coefficient N!/K!(N-K)! + * Adapted from http://blog.plover.com/math/choose.html + * + * @param N + * @param K + * + * @return N!/K!(N-K)! + */ + +template T get_binom_coeff(T N, T K) { + T r = 1; + T d; + if (K > N) + return 0; + for (d = 1; d <= K; d++) { + r *= N--; + r /= d; + } + return r; +} + /** * Returns the hafnian of a matrix using the algorithm * described in *From moments of sum to moments of product*, * [doi:10.1016/j.jmva.2007.01.013](https://dx.doi.org/10.1016/j.jmva.2007.01.013>). * - * Note that this algorithm, while generally slower than others, can be significantly more - * efficient in the cases where the matrix has repeated rows and columns. + * Note that this algorithm, while generally slower than others, can be + * significantly more efficient in the cases where the matrix has repeated rows + * and columns. * * @param mat a flattened vector of size \f$n^2\f$, representing an * \f$n\times n\f$ row-ordered symmetric matrix. * @param rpt a vector of integers, representing the number of * times each row/column in `mat` is repeated. For example, - * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all ones. + * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all + * ones. * @return hafnian of the input matrix */ template inline T hafnian_rpt(std::vector &mat, std::vector &rpt) { - int n = std::sqrt(static_cast(mat.size())); - assert(static_cast(rpt.size()) == n); - long double p = 2; - T y = 0.0, q = 0.0; + int n = std::sqrt(static_cast(mat.size())); + assert(static_cast(rpt.size()) == n); + T y = 0.0; - std::vector x(n, 0.0); +#pragma omp parallel + { + T q = 0.0L; + + long double p = 2; + // std::vector x(n, 0.0); int s = std::accumulate(rpt.begin(), rpt.end(), 0); int s2 = s / 2; for (int i = 1; i < s2; i++) { - p /= i + 1; - } - - std::vector nu2(n); - std::transform(rpt.begin(), rpt.end(), nu2.begin(), - std::bind(std::multiplies(), std::placeholders::_1, 0.5L)); - - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - q += 0.5L * nu2[j] * mat[i * n + j] * nu2[i]; - } + p /= i + 1; } unsigned long long int steps = 1; for (auto i : rpt) { - steps *= i + 1; + steps *= i + 1; } steps /= 2; - for (unsigned long long int i = 0; i < steps; i++) { - y += static_cast(p) * pow(q, s2); - - for (int j = 0; j < n; j++) { - - if (x[j] < rpt[j]) { - x[j] += 1; - p *= -static_cast(rpt[j] + 1 - x[j]) / x[j]; - - for (int k = 0; k < n; k++) { - q -= mat[k * n + j] * (nu2[k] - x[k]); - } - q -= 0.5L * mat[j * n + j]; - break; - } - else { - x[j] = 0; - if (rpt[j] % 2 == 1) { - p *= -1; - } - for (int k = 0; k < n; k++) { - q += (1.0L * rpt[j]) * mat[k * n + j] * (nu2[k] - x[k]); - } - q -= 0.5L * rpt[j] * rpt[j] * mat[j * n + j]; - } + int chunks = omp_get_num_threads(); + int id = omp_get_thread_num(); + int chunk_size = steps / chunks; + int beg = id * chunk_size; + int end = (id == chunks - 1) ? steps : beg + chunk_size; + T sum_chunk = 0.L; + + std::vector x = lin_to_multi(beg, rpt); + std::vector nu2(n); + for (int i = 0; i < n; i++) + nu2[i] = 0.5 * rpt[i] - x[i]; + + int x_sum = std::accumulate(x.begin(), x.end(), 0); + for (int i = 0; i < n; i++) { + p *= get_binom_coeff(rpt[i], x[i]); + } + p *= (x_sum % 2 == 0) ? 1 : -1; + + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + q += 0.5L * nu2[j] * mat[i * n + j] * nu2[i]; + } + } + + for (unsigned long long int i = beg; i < end; i++) { + sum_chunk += static_cast(p) * pow(q, s2); + + for (int j = 0; j < n; j++) { + + if (x[j] < rpt[j]) { + x[j] += 1; + p *= -static_cast(rpt[j] + 1 - x[j]) / x[j]; + + for (int k = 0; k < n; k++) { + q -= mat[k * n + j] * (0.5L * rpt[k] - x[k]); + } + q -= 0.5L * mat[j * n + j]; + break; + } else { + x[j] = 0; + if (rpt[j] % 2 == 1) { + p *= -1; + } + for (int k = 0; k < n; k++) { + q += (1.0L * rpt[j]) * mat[k * n + j] * (0.5L * rpt[k] - x[k]); + } + q -= 0.5L * rpt[j] * rpt[j] * mat[j * n + j]; } + } } +#pragma omp critical + y += sum_chunk; + } - return y; + return y; } - /** * Returns the loop hafnian of a matrix using the algorithm * described in *From moments of sum to moments of product*, * [doi:10.1016/j.jmva.2007.01.013](https://dx.doi.org/10.1016/j.jmva.2007.01.013>). * - * Note that this algorithm, while generally slower than others, can be significantly more - * efficient in the cases where the matrix has repeated rows and columns. + * Note that this algorithm, while generally slower than others, can be + * significantly more efficient in the cases where the matrix has repeated rows + * and columns. * * @param mat a flattened vector of size \f$n^2\f$, representing an * \f$n\times n\f$ row-ordered symmetric matrix. - * @param mu a vector of length \f$n\f$ representing the vector of means/displacement. + * @param mu a vector of length \f$n\f$ representing the vector of + * means/displacement. * @param rpt a vector of integers, representing the number of * times each row/column in `mat` is repeated. For example, - * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all ones. + * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all + * ones. * @return loop hafnian of the input matrix */ template -inline T loop_hafnian_rpt(std::vector &mat, std::vector &mu, std::vector &rpt) { - int n = std::sqrt(static_cast(mat.size())); - assert(static_cast(rpt.size()) == n); - +inline T loop_hafnian_rpt(std::vector &mat, std::vector &mu, + std::vector &rpt) { + int n = std::sqrt(static_cast(mat.size())); + assert(static_cast(rpt.size()) == n); + + T y = 0.0L; + unsigned long long int steps = 1; + for (auto i : rpt) { + steps *= i + 1; + } + steps /= 2; + +#pragma omp parallel + { long long int p = 2; - T y = 0.0L, q = 0.0L, q1 = 0.0L; + T q = 0.0L, q1 = 0.0L; + + int chunks = omp_get_num_threads(); + int id = omp_get_thread_num(); + int chunk_size = steps / chunks; + int beg = id * chunk_size; + int end = (id == chunks - 1) ? steps : beg + chunk_size; + T sum_chunk = 0.L; - std::vector x(n, 0.0); + std::vector x = lin_to_multi(beg, rpt); int s = std::accumulate(rpt.begin(), rpt.end(), 0); int s1 = std::floor(0.5 * s) + 1; std::vector z1(s1, 1.0L); std::vector z2(s1, 1.0L); - // diagonal of matrix mat - // std::vector mu(n); - // for (int i=0; i nu2(n); - std::transform(rpt.begin(), rpt.end(), nu2.begin(), - std::bind(std::multiplies(), std::placeholders::_1, 0.5L)); + for (int i = 0; i < n; i++) + nu2[i] = 0.5 * rpt[i] - x[i]; for (int i = 0; i < n; i++) { - q1 += nu2[i] * mu[i]; - for (int j = 0; j < n; j++) { - q += 0.5L * nu2[j] * mat[i * n + j] * nu2[i]; - } + q1 += nu2[i] * mu[i]; + for (int j = 0; j < n; j++) { + q += 0.5L * nu2[j] * mat[i * n + j] * nu2[i]; + } } - unsigned long long int steps = 1; - - for (auto i : rpt) { - steps *= i + 1; + int x_sum = std::accumulate(x.begin(), x.end(), 0); + for (int i = 0; i < n; i++) { + p *= get_binom_coeff(rpt[i], x[i]); } + p *= (x_sum % 2 == 0) ? 1 : -1; - steps /= 2; + for (unsigned long long int i = beg; i < end; i++) { + for (int j = 1; j < s1; j++) { + z1[j] = z1[j - 1] * q / (1.0L * j); + } - for (unsigned long long int i = 0; i < steps; i++) { + if (s % 2 == 1) { + z2[0] = q1; for (int j = 1; j < s1; j++) { - z1[j] = z1[j - 1] * q / (1.0L * j); - } - - if (s % 2 == 1) { - z2[0] = q1; - for (int j = 1; j < s1; j++) { - z2[j] = z2[j - 1] * pow(q1, 2) / (2.0L * j) / (2.0L * j + 1); - } - } - else { - for (int j = 1; j < s1; j++) { - z2[j] = z2[j - 1] * pow(q1, 2) / (2.0L * j) / (2.0L * j - 1); - } + z2[j] = z2[j - 1] * pow(q1, 2) / (2.0L * j) / (2.0L * j + 1); } - - - T z1z2prod = 0.0; - for (int j = 0; j < s1; j++) { - z1z2prod += z1[j] * z2[s1 - 1 - j]; + } else { + for (int j = 1; j < s1; j++) { + z2[j] = z2[j - 1] * pow(q1, 2) / (2.0L * j) / (2.0L * j - 1); } - - y += static_cast(p) * z1z2prod; - - for (int j = 0; j < n; j++) { - - if (x[j] < rpt[j]) { - x[j] += 1; - p = -std::round(p * static_cast(rpt[j] + 1 - x[j]) / x[j]); - - for (int k = 0; k < n; k++) { - q -= mat[k * n + j] * (nu2[k] - x[k]); - } - q -= 0.5L * mat[j * n + j]; - q1 -= mu[j]; - break; - } - else { - x[j] = 0; - if (rpt[j] % 2 == 1) { - p *= -1; - } - for (int k = 0; k < n; k++) { - q += (1.0L * rpt[j]) * mat[k * n + j] * (nu2[k] - x[k]); - } - q -= 0.5L * rpt[j] * rpt[j] * mat[j * n + j]; - q1 += static_cast(rpt[j]) * mu[j]; - } + } + + T z1z2prod = 0.0; + for (int j = 0; j < s1; j++) { + z1z2prod += z1[j] * z2[s1 - 1 - j]; + } + + sum_chunk += static_cast(p) * z1z2prod; + + for (int j = 0; j < n; j++) { + if (x[j] < rpt[j]) { + x[j] += 1; + p = -std::round(p * static_cast(rpt[j] + 1 - x[j]) / + x[j]); + + for (int k = 0; k < n; k++) { + q -= mat[k * n + j] * (0.5L * rpt[k] - x[k]); + } + q -= 0.5L * mat[j * n + j]; + q1 -= mu[j]; + break; + } else { + x[j] = 0; + if (rpt[j] % 2 == 1) { + p *= -1; + } + for (int k = 0; k < n; k++) { + q += (1.0L * rpt[j]) * mat[k * n + j] * (0.5L * rpt[k] - x[k]); + } + q -= 0.5L * rpt[j] * rpt[j] * mat[j * n + j]; + q1 += static_cast(rpt[j]) * mu[j]; } + } } - return y; +#pragma omp critical + y += sum_chunk; + } + + return y; } /** @@ -220,11 +311,12 @@ inline T loop_hafnian_rpt(std::vector &mat, std::vector &mu, std::vector). * - * Note that this algorithm, while generally slower than others, can be significantly more - * efficient in the cases where the matrix has repeated rows and columns. + * Note that this algorithm, while generally slower than others, can be + * significantly more efficient in the cases where the matrix has repeated rows + * and columns. * - * This is a wrapper around the templated function `libwalrus::hafnian_rpt` for Python - * integration. It accepts and returns complex double numeric types, and + * This is a wrapper around the templated function `libwalrus::hafnian_rpt` for + * Python integration. It accepts and returns complex double numeric types, and * returns sensible values for empty and non-even matrices. * * In addition, this wrapper function automatically casts all matrices @@ -235,35 +327,37 @@ inline T loop_hafnian_rpt(std::vector &mat, std::vector &mu, std::vector hafnian_rpt_quad(std::vector> &mat, std::vector &rpt) { - std::vector> matq(mat.begin(), mat.end()); - int s = std::accumulate(rpt.begin(), rpt.end(), 0); - int n = std::sqrt(static_cast(mat.size())); - std::complex haf; - - if ( s == 0 || n == 0) - haf = std::complex(1.0, 0.0); - else - haf = hafnian_rpt(matq, rpt); - - return static_cast>(haf); +std::complex hafnian_rpt_quad(std::vector> &mat, + std::vector &rpt) { + std::vector> matq(mat.begin(), mat.end()); + int s = std::accumulate(rpt.begin(), rpt.end(), 0); + int n = std::sqrt(static_cast(mat.size())); + std::complex haf; + + if (s == 0 || n == 0) + haf = std::complex(1.0, 0.0); + else + haf = hafnian_rpt(matq, rpt); + + return static_cast>(haf); } - /** * Returns the hafnian of a matrix using the algorithm * described in *From moments of sum to moments of product*, * [doi:10.1016/j.jmva.2007.01.013](https://dx.doi.org/10.1016/j.jmva.2007.01.013>). * - * Note that this algorithm, while generally slower than others, can be significantly more - * efficient in the cases where the matrix has repeated rows and columns. + * Note that this algorithm, while generally slower than others, can be + * significantly more efficient in the cases where the matrix has repeated rows + * and columns. * - * This is a wrapper around the templated function `libwalrus::hafnian_rpt` for Python - * integration. It accepts and returns double numeric types, and - * returns sensible values for empty and non-even matrices. + * This is a wrapper around the templated function `libwalrus::hafnian_rpt` for + * Python integration. It accepts and returns double numeric types, and returns + * sensible values for empty and non-even matrices. * * In addition, this wrapper function automatically casts all matrices * to type `long double`, allowing for greater precision than supported @@ -273,35 +367,36 @@ std::complex hafnian_rpt_quad(std::vector> &mat, st * \f$n\times n\f$ row-ordered symmetric matrix. * @param rpt a vector of integers, representing the number of * times each row/column in `mat` is repeated. For example, - * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all ones. + * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all + * ones. * @return hafnian of the input matrix */ double hafnian_rpt_quad(std::vector &mat, std::vector &rpt) { - std::vector matq(mat.begin(), mat.end()); - int s = std::accumulate(rpt.begin(), rpt.end(), 0); - int n = std::sqrt(static_cast(mat.size())); - long double haf; + std::vector matq(mat.begin(), mat.end()); + int s = std::accumulate(rpt.begin(), rpt.end(), 0); + int n = std::sqrt(static_cast(mat.size())); + long double haf; - if ( s == 0 || n == 0) - haf = 1.0; - else - haf = hafnian_rpt(matq, rpt); + if (s == 0 || n == 0) + haf = 1.0; + else + haf = hafnian_rpt(matq, rpt); - return static_cast(haf); + return static_cast(haf); } - /** * Returns the loop hafnian of a matrix using the algorithm * described in *From moments of sum to moments of product*, * [doi:10.1016/j.jmva.2007.01.013](https://dx.doi.org/10.1016/j.jmva.2007.01.013>). * - * Note that this algorithm, while generally slower than others, can be significantly more - * efficient in the cases where the matrix has repeated rows and columns. + * Note that this algorithm, while generally slower than others, can be + * significantly more efficient in the cases where the matrix has repeated rows + * and columns. * - * This is a wrapper around the templated function `libwalrus::hafnian_rpt_loop` for Python - * integration. It accepts and returns complex double numeric types, and - * returns sensible values for empty and non-even matrices. + * This is a wrapper around the templated function `libwalrus::hafnian_rpt_loop` + * for Python integration. It accepts and returns complex double numeric types, + * and returns sensible values for empty and non-even matrices. * * In addition, this wrapper function automatically casts all matrices * to type `complex`, allowing for greater precision than supported @@ -309,38 +404,43 @@ double hafnian_rpt_quad(std::vector &mat, std::vector &rpt) { * * @param mat a flattened vector of size \f$n^2\f$, representing an * \f$n\times n\f$ row-ordered symmetric matrix. - * @param mu a vector of length \f$n\f$ representing the vector of means/displacement. + * @param mu a vector of length \f$n\f$ representing the vector of + * means/displacement. * @param rpt a vector of integers, representing the number of * times each row/column in `mat` is repeated. For example, - * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all ones. + * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all + * ones. * @return loop hafnian of the input matrix */ -std::complex loop_hafnian_rpt_quad(std::vector> &mat, std::vector> &mu, std::vector &rpt) { - std::vector> matq(mat.begin(), mat.end()); - std::vector> muq(mu.begin(), mu.end()); - std::complex haf; - int s = std::accumulate(rpt.begin(), rpt.end(), 0); - int n = std::sqrt(static_cast(mat.size())); - - if ( s == 0 || n == 0) - haf = std::complex(1.0, 0.0); - else - haf = loop_hafnian_rpt(matq, muq, rpt); - - return static_cast>(haf); +std::complex +loop_hafnian_rpt_quad(std::vector> &mat, + std::vector> &mu, + std::vector &rpt) { + std::vector> matq(mat.begin(), mat.end()); + std::vector> muq(mu.begin(), mu.end()); + std::complex haf; + int s = std::accumulate(rpt.begin(), rpt.end(), 0); + int n = std::sqrt(static_cast(mat.size())); + + if (s == 0 || n == 0) + haf = std::complex(1.0, 0.0); + else + haf = loop_hafnian_rpt(matq, muq, rpt); + + return static_cast>(haf); } - /** * Returns the loop hafnian of a matrix using the algorithm * described in *From moments of sum to moments of product*, * [doi:10.1016/j.jmva.2007.01.013](https://dx.doi.org/10.1016/j.jmva.2007.01.013>). * - * Note that this algorithm, while generally slower than others, can be significantly more - * efficient in the cases where the matrix has repeated rows and columns. + * Note that this algorithm, while generally slower than others, can be + * significantly more efficient in the cases where the matrix has repeated rows + * and columns. * - * This is a wrapper around the templated function `libwalrus::hafnian_rpt_loop` for Python - * integration. It accepts and returns double numeric types, and + * This is a wrapper around the templated function `libwalrus::hafnian_rpt_loop` + * for Python integration. It accepts and returns double numeric types, and * returns sensible values for empty and non-even matrices. * * In addition, this wrapper function automatically casts all matrices @@ -349,25 +449,28 @@ std::complex loop_hafnian_rpt_quad(std::vector> &ma * * @param mat a flattened vector of size \f$n^2\f$, representing an * \f$n\times n\f$ row-ordered symmetric matrix. - * @param mu a vector of length \f$n\f$ representing the vector of means/displacement. + * @param mu a vector of length \f$n\f$ representing the vector of + * means/displacement. * @param rpt a vector of integers, representing the number of * times each row/column in `mat` is repeated. For example, - * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all ones. + * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all + * ones. * @return loop hafnian of the input matrix */ -double loop_hafnian_rpt_quad(std::vector &mat, std::vector &mu, std::vector &rpt) { - std::vector matq(mat.begin(), mat.end()); - std::vector muq(mu.begin(), mu.end()); - long double haf; - int s = std::accumulate(rpt.begin(), rpt.end(), 0); - int n = std::sqrt(static_cast(mat.size())); - - if ( s == 0 || n == 0) - haf = 1.0; - else - haf = loop_hafnian_rpt(matq, muq, rpt); - - return static_cast(haf); +double loop_hafnian_rpt_quad(std::vector &mat, std::vector &mu, + std::vector &rpt) { + std::vector matq(mat.begin(), mat.end()); + std::vector muq(mu.begin(), mu.end()); + long double haf; + int s = std::accumulate(rpt.begin(), rpt.end(), 0); + int n = std::sqrt(static_cast(mat.size())); + + if (s == 0 || n == 0) + haf = 1.0; + else + haf = loop_hafnian_rpt(matq, muq, rpt); + + return static_cast(haf); } -} +} // namespace libwalrus From 350083d16f7e9f0271695f59afb90fcdcde17b3e Mon Sep 17 00:00:00 2001 From: Trevor Vincent Date: Sun, 12 Jan 2020 23:23:18 -0500 Subject: [PATCH 02/10] Fix compiler warnings --- include/repeated_hafnian.hpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/repeated_hafnian.hpp b/include/repeated_hafnian.hpp index 66e582fe2..daa5f3a3a 100644 --- a/include/repeated_hafnian.hpp +++ b/include/repeated_hafnian.hpp @@ -37,19 +37,19 @@ namespace libwalrus { * @return hafnian of the input matrix */ -std::vector lin_to_multi(int linear_index, const std::vector &maxes) { +std::vector lin_to_multi(unsigned long long int linear_index, const std::vector &maxes) { std::vector i(maxes.size(), 0); - int l = 0; + unsigned int l = 0; int s0 = maxes[0] + 1; i[l] = linear_index % s0; - int prod = s0; - int gl = linear_index; + unsigned long long int prod = s0; + unsigned long long int gl = linear_index; while (l < i.size() - 1) { if (prod > linear_index) { break; } else { - int glp1 = (gl - i[l]) / (maxes[l] + 1); + unsigned long long int glp1 = (gl - i[l]) / (maxes[l] + 1); i[l + 1] = glp1 % (maxes[l + 1] + 1); prod *= (maxes[1 + 1] + 1); gl = glp1; @@ -128,9 +128,9 @@ inline T hafnian_rpt(std::vector &mat, std::vector &rpt) { int chunks = omp_get_num_threads(); int id = omp_get_thread_num(); - int chunk_size = steps / chunks; - int beg = id * chunk_size; - int end = (id == chunks - 1) ? steps : beg + chunk_size; + unsigned long long int chunk_size = steps / chunks; + unsigned long long int beg = id * chunk_size; + unsigned long long int end = (id == chunks - 1) ? steps : beg + chunk_size; T sum_chunk = 0.L; std::vector x = lin_to_multi(beg, rpt); @@ -222,9 +222,9 @@ inline T loop_hafnian_rpt(std::vector &mat, std::vector &mu, int chunks = omp_get_num_threads(); int id = omp_get_thread_num(); - int chunk_size = steps / chunks; - int beg = id * chunk_size; - int end = (id == chunks - 1) ? steps : beg + chunk_size; + unsigned long long int chunk_size = steps / chunks; + unsigned long long int beg = id * chunk_size; + unsigned long long int end = (id == chunks - 1) ? steps : beg + chunk_size; T sum_chunk = 0.L; std::vector x = lin_to_multi(beg, rpt); From 6fa8f446950e377369003ea4b1e715f86ff06885 Mon Sep 17 00:00:00 2001 From: Trevor Vincent Date: Mon, 13 Jan 2020 13:50:08 -0500 Subject: [PATCH 03/10] Fix bug in lin_to_multi --- include/repeated_hafnian.hpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/include/repeated_hafnian.hpp b/include/repeated_hafnian.hpp index daa5f3a3a..56bf4a013 100644 --- a/include/repeated_hafnian.hpp +++ b/include/repeated_hafnian.hpp @@ -46,14 +46,10 @@ std::vector lin_to_multi(unsigned long long int linear_index, const std::ve unsigned long long int gl = linear_index; while (l < i.size() - 1) { - if (prod > linear_index) { - break; - } else { - unsigned long long int glp1 = (gl - i[l]) / (maxes[l] + 1); - i[l + 1] = glp1 % (maxes[l + 1] + 1); - prod *= (maxes[1 + 1] + 1); - gl = glp1; - } + unsigned long long int glp1 = (gl - i[l]) / (maxes[l] + 1); + i[l + 1] = glp1 % (maxes[l + 1] + 1); + prod *= (maxes[1 + 1] + 1); + gl = glp1; l++; } return i; From a8cce49dd9d69e80ea43beba65a3b59c20bb81f1 Mon Sep 17 00:00:00 2001 From: Trevor Vincent Date: Mon, 13 Jan 2020 14:40:55 -0500 Subject: [PATCH 04/10] update changelog --- .github/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index 8353d4b49..0e7271906 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -10,6 +10,8 @@ * Updates and improves the speed and accuracy of `thewalrus.quantum.fock_tensor`. [#107](https://github.com/XanaduAI/thewalrus/pull/107) +* Add OpenMP support to the repeated moment hafnian code. [#120](https://github.com/XanaduAI/thewalrus/pull/120) + ### Bug fixes * Corrects typos in the random number generation in the C++ unit tests. [#118](https://github.com/XanaduAI/thewalrus/pull/118) From 9055570a87db1dd0d08d5720bae5bfb3bbd5641c Mon Sep 17 00:00:00 2001 From: Trevor Vincent Date: Mon, 13 Jan 2020 15:57:26 -0500 Subject: [PATCH 05/10] Fix windows issue --- include/repeated_hafnian.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/repeated_hafnian.hpp b/include/repeated_hafnian.hpp index 56bf4a013..79b6b305c 100644 --- a/include/repeated_hafnian.hpp +++ b/include/repeated_hafnian.hpp @@ -22,6 +22,12 @@ #pragma once #include +#if not defined(_OPENMP) +typedef int omp_int_t; +inline omp_int_t omp_get_thread_num() { return 0;} +inline omp_int_t omp_get_num_threads() { return 1;} +#endif + namespace libwalrus { /** From 1deab6d1f5c8c5476a8833a09351207374d63f71 Mon Sep 17 00:00:00 2001 From: Trevor Vincent Date: Mon, 13 Jan 2020 16:02:03 -0500 Subject: [PATCH 06/10] Revert clang-format doxygen changes --- include/repeated_hafnian.hpp | 78 +++++++++++++++--------------------- 1 file changed, 32 insertions(+), 46 deletions(-) diff --git a/include/repeated_hafnian.hpp b/include/repeated_hafnian.hpp index 79b6b305c..7b4990c15 100644 --- a/include/repeated_hafnian.hpp +++ b/include/repeated_hafnian.hpp @@ -88,16 +88,14 @@ template T get_binom_coeff(T N, T K) { * described in *From moments of sum to moments of product*, * [doi:10.1016/j.jmva.2007.01.013](https://dx.doi.org/10.1016/j.jmva.2007.01.013>). * - * Note that this algorithm, while generally slower than others, can be - * significantly more efficient in the cases where the matrix has repeated rows - * and columns. + * Note that this algorithm, while generally slower than others, can be significantly more + * efficient in the cases where the matrix has repeated rows and columns. * * @param mat a flattened vector of size \f$n^2\f$, representing an * \f$n\times n\f$ row-ordered symmetric matrix. * @param rpt a vector of integers, representing the number of * times each row/column in `mat` is repeated. For example, - * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all - * ones. + * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all ones. * @return hafnian of the input matrix */ template @@ -185,23 +183,21 @@ inline T hafnian_rpt(std::vector &mat, std::vector &rpt) { return y; } + /** * Returns the loop hafnian of a matrix using the algorithm * described in *From moments of sum to moments of product*, * [doi:10.1016/j.jmva.2007.01.013](https://dx.doi.org/10.1016/j.jmva.2007.01.013>). * - * Note that this algorithm, while generally slower than others, can be - * significantly more efficient in the cases where the matrix has repeated rows - * and columns. + * Note that this algorithm, while generally slower than others, can be significantly more + * efficient in the cases where the matrix has repeated rows and columns. * * @param mat a flattened vector of size \f$n^2\f$, representing an * \f$n\times n\f$ row-ordered symmetric matrix. - * @param mu a vector of length \f$n\f$ representing the vector of - * means/displacement. + * @param mu a vector of length \f$n\f$ representing the vector of means/displacement. * @param rpt a vector of integers, representing the number of * times each row/column in `mat` is repeated. For example, - * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all - * ones. + * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all ones. * @return loop hafnian of the input matrix */ template @@ -313,12 +309,11 @@ inline T loop_hafnian_rpt(std::vector &mat, std::vector &mu, * described in *From moments of sum to moments of product*, * [doi:10.1016/j.jmva.2007.01.013](https://dx.doi.org/10.1016/j.jmva.2007.01.013>). * - * Note that this algorithm, while generally slower than others, can be - * significantly more efficient in the cases where the matrix has repeated rows - * and columns. + * Note that this algorithm, while generally slower than others, can be significantly more + * efficient in the cases where the matrix has repeated rows and columns. * - * This is a wrapper around the templated function `libwalrus::hafnian_rpt` for - * Python integration. It accepts and returns complex double numeric types, and + * This is a wrapper around the templated function `libwalrus::hafnian_rpt` for Python + * integration. It accepts and returns complex double numeric types, and * returns sensible values for empty and non-even matrices. * * In addition, this wrapper function automatically casts all matrices @@ -329,8 +324,7 @@ inline T loop_hafnian_rpt(std::vector &mat, std::vector &mu, * \f$n\times n\f$ row-ordered symmetric matrix. * @param rpt a vector of integers, representing the number of * times each row/column in `mat` is repeated. For example, - * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all - * ones. + * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all ones. * @return hafnian of the input matrix */ std::complex hafnian_rpt_quad(std::vector> &mat, @@ -353,13 +347,12 @@ std::complex hafnian_rpt_quad(std::vector> &mat, * described in *From moments of sum to moments of product*, * [doi:10.1016/j.jmva.2007.01.013](https://dx.doi.org/10.1016/j.jmva.2007.01.013>). * - * Note that this algorithm, while generally slower than others, can be - * significantly more efficient in the cases where the matrix has repeated rows - * and columns. + * Note that this algorithm, while generally slower than others, can be significantly more + * efficient in the cases where the matrix has repeated rows and columns. * - * This is a wrapper around the templated function `libwalrus::hafnian_rpt` for - * Python integration. It accepts and returns double numeric types, and returns - * sensible values for empty and non-even matrices. + * This is a wrapper around the templated function `libwalrus::hafnian_rpt` for Python + * integration. It accepts and returns double numeric types, and + * returns sensible values for empty and non-even matrices. * * In addition, this wrapper function automatically casts all matrices * to type `long double`, allowing for greater precision than supported @@ -369,8 +362,7 @@ std::complex hafnian_rpt_quad(std::vector> &mat, * \f$n\times n\f$ row-ordered symmetric matrix. * @param rpt a vector of integers, representing the number of * times each row/column in `mat` is repeated. For example, - * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all - * ones. + * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all ones. * @return hafnian of the input matrix */ double hafnian_rpt_quad(std::vector &mat, std::vector &rpt) { @@ -392,13 +384,12 @@ double hafnian_rpt_quad(std::vector &mat, std::vector &rpt) { * described in *From moments of sum to moments of product*, * [doi:10.1016/j.jmva.2007.01.013](https://dx.doi.org/10.1016/j.jmva.2007.01.013>). * - * Note that this algorithm, while generally slower than others, can be - * significantly more efficient in the cases where the matrix has repeated rows - * and columns. + * Note that this algorithm, while generally slower than others, can be significantly more + * efficient in the cases where the matrix has repeated rows and columns. * - * This is a wrapper around the templated function `libwalrus::hafnian_rpt_loop` - * for Python integration. It accepts and returns complex double numeric types, - * and returns sensible values for empty and non-even matrices. + * This is a wrapper around the templated function `libwalrus::hafnian_rpt_loop` for Python + * integration. It accepts and returns complex double numeric types, and + * returns sensible values for empty and non-even matrices. * * In addition, this wrapper function automatically casts all matrices * to type `complex`, allowing for greater precision than supported @@ -406,12 +397,10 @@ double hafnian_rpt_quad(std::vector &mat, std::vector &rpt) { * * @param mat a flattened vector of size \f$n^2\f$, representing an * \f$n\times n\f$ row-ordered symmetric matrix. - * @param mu a vector of length \f$n\f$ representing the vector of - * means/displacement. + * @param mu a vector of length \f$n\f$ representing the vector of means/displacement. * @param rpt a vector of integers, representing the number of * times each row/column in `mat` is repeated. For example, - * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all - * ones. + * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all ones. * @return loop hafnian of the input matrix */ std::complex @@ -437,12 +426,11 @@ loop_hafnian_rpt_quad(std::vector> &mat, * described in *From moments of sum to moments of product*, * [doi:10.1016/j.jmva.2007.01.013](https://dx.doi.org/10.1016/j.jmva.2007.01.013>). * - * Note that this algorithm, while generally slower than others, can be - * significantly more efficient in the cases where the matrix has repeated rows - * and columns. + * Note that this algorithm, while generally slower than others, can be significantly more + * efficient in the cases where the matrix has repeated rows and columns. * - * This is a wrapper around the templated function `libwalrus::hafnian_rpt_loop` - * for Python integration. It accepts and returns double numeric types, and + * This is a wrapper around the templated function `libwalrus::hafnian_rpt_loop` for Python + * integration. It accepts and returns double numeric types, and * returns sensible values for empty and non-even matrices. * * In addition, this wrapper function automatically casts all matrices @@ -451,12 +439,10 @@ loop_hafnian_rpt_quad(std::vector> &mat, * * @param mat a flattened vector of size \f$n^2\f$, representing an * \f$n\times n\f$ row-ordered symmetric matrix. - * @param mu a vector of length \f$n\f$ representing the vector of - * means/displacement. + * @param mu a vector of length \f$n\f$ representing the vector of means/displacement. * @param rpt a vector of integers, representing the number of * times each row/column in `mat` is repeated. For example, - * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all - * ones. + * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all ones. * @return loop hafnian of the input matrix */ double loop_hafnian_rpt_quad(std::vector &mat, std::vector &mu, From d18c141ac598e3aafe6c0a789f02d290bad3630c Mon Sep 17 00:00:00 2001 From: Trevor Vincent Date: Mon, 13 Jan 2020 16:35:24 -0500 Subject: [PATCH 07/10] Fix windows problem attempt #2 --- include/repeated_hafnian.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/repeated_hafnian.hpp b/include/repeated_hafnian.hpp index 7b4990c15..97f6e46d3 100644 --- a/include/repeated_hafnian.hpp +++ b/include/repeated_hafnian.hpp @@ -22,7 +22,7 @@ #pragma once #include -#if not defined(_OPENMP) +#if defined(_MSC_VER) typedef int omp_int_t; inline omp_int_t omp_get_thread_num() { return 0;} inline omp_int_t omp_get_num_threads() { return 1;} From 1fa6d7241558f7e41b8aea075c6d5641a6f41c36 Mon Sep 17 00:00:00 2001 From: Trevor Vincent Date: Tue, 14 Jan 2020 11:59:24 -0500 Subject: [PATCH 08/10] Remove lines that are not needed anymore --- include/repeated_hafnian.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/repeated_hafnian.hpp b/include/repeated_hafnian.hpp index 97f6e46d3..e7e911f58 100644 --- a/include/repeated_hafnian.hpp +++ b/include/repeated_hafnian.hpp @@ -48,13 +48,11 @@ std::vector lin_to_multi(unsigned long long int linear_index, const std::ve unsigned int l = 0; int s0 = maxes[0] + 1; i[l] = linear_index % s0; - unsigned long long int prod = s0; unsigned long long int gl = linear_index; while (l < i.size() - 1) { unsigned long long int glp1 = (gl - i[l]) / (maxes[l] + 1); i[l + 1] = glp1 % (maxes[l + 1] + 1); - prod *= (maxes[1 + 1] + 1); gl = glp1; l++; } From d99d0f87a6e5e06080077b432c9f560ae746ca48 Mon Sep 17 00:00:00 2001 From: Trevor Vincent Date: Thu, 16 Jan 2020 17:05:27 -0500 Subject: [PATCH 09/10] Add minor optimizations for one core --- include/repeated_hafnian.hpp | 69 ++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/include/repeated_hafnian.hpp b/include/repeated_hafnian.hpp index e7e911f58..c88417ebb 100644 --- a/include/repeated_hafnian.hpp +++ b/include/repeated_hafnian.hpp @@ -42,8 +42,8 @@ namespace libwalrus { * * @return hafnian of the input matrix */ - -std::vector lin_to_multi(unsigned long long int linear_index, const std::vector &maxes) { +std::vector lin_to_multi(unsigned long long int linear_index, + const std::vector &maxes) { std::vector i(maxes.size(), 0); unsigned int l = 0; int s0 = maxes[0] + 1; @@ -68,7 +68,6 @@ std::vector lin_to_multi(unsigned long long int linear_index, const std::ve * * @return N!/K!(N-K)! */ - template T get_binom_coeff(T N, T K) { T r = 1; T d; @@ -98,7 +97,6 @@ template T get_binom_coeff(T N, T K) { */ template inline T hafnian_rpt(std::vector &mat, std::vector &rpt) { - int n = std::sqrt(static_cast(mat.size())); assert(static_cast(rpt.size()) == n); T y = 0.0; @@ -131,20 +129,24 @@ inline T hafnian_rpt(std::vector &mat, std::vector &rpt) { unsigned long long int end = (id == chunks - 1) ? steps : beg + chunk_size; T sum_chunk = 0.L; - std::vector x = lin_to_multi(beg, rpt); + std::vector x = + (chunks == 1) ? std::vector(n, 0) : lin_to_multi(beg, rpt); std::vector nu2(n); - for (int i = 0; i < n; i++) - nu2[i] = 0.5 * rpt[i] - x[i]; + for (int i = 0; i < n; i++) nu2[i] = 0.5 * rpt[i]; - int x_sum = std::accumulate(x.begin(), x.end(), 0); - for (int i = 0; i < n; i++) { - p *= get_binom_coeff(rpt[i], x[i]); + if (chunks != 1) { + for (int i = 0; i < n; i++) { + p *= get_binom_coeff(rpt[i], x[i]); + } + p *= (std::accumulate(x.begin(), x.end(), 0) % 2 == 0) ? 1 : -1; } - p *= (x_sum % 2 == 0) ? 1 : -1; for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - q += 0.5L * nu2[j] * mat[i * n + j] * nu2[i]; + T nu2_min_x = (nu2[i] - static_cast(x[i])); + q += 0.5L * nu2_min_x * nu2_min_x * mat[i * n + i]; + for (int j = 0; j < i; j++) { + q += (nu2[j] - static_cast(x[j])) * nu2_min_x * + mat[i * n + j]; } } @@ -152,13 +154,12 @@ inline T hafnian_rpt(std::vector &mat, std::vector &rpt) { sum_chunk += static_cast(p) * pow(q, s2); for (int j = 0; j < n; j++) { - if (x[j] < rpt[j]) { x[j] += 1; p *= -static_cast(rpt[j] + 1 - x[j]) / x[j]; for (int k = 0; k < n; k++) { - q -= mat[k * n + j] * (0.5L * rpt[k] - x[k]); + q -= mat[k * n + j] * (nu2[k] - static_cast(x[k])); } q -= 0.5L * mat[j * n + j]; break; @@ -168,7 +169,8 @@ inline T hafnian_rpt(std::vector &mat, std::vector &rpt) { p *= -1; } for (int k = 0; k < n; k++) { - q += (1.0L * rpt[j]) * mat[k * n + j] * (0.5L * rpt[k] - x[k]); + q += static_cast(rpt[j]) * mat[k * n + j] * + (nu2[k] - static_cast(x[k])); } q -= 0.5L * rpt[j] * rpt[j] * mat[j * n + j]; } @@ -181,7 +183,6 @@ inline T hafnian_rpt(std::vector &mat, std::vector &rpt) { return y; } - /** * Returns the loop hafnian of a matrix using the algorithm * described in *From moments of sum to moments of product*, @@ -217,34 +218,39 @@ inline T loop_hafnian_rpt(std::vector &mat, std::vector &mu, T q = 0.0L, q1 = 0.0L; int chunks = omp_get_num_threads(); - int id = omp_get_thread_num(); + int id = (chunks == 1) ? 0 : omp_get_thread_num(); unsigned long long int chunk_size = steps / chunks; unsigned long long int beg = id * chunk_size; unsigned long long int end = (id == chunks - 1) ? steps : beg + chunk_size; + T sum_chunk = 0.L; - std::vector x = lin_to_multi(beg, rpt); + std::vector x = + (chunks == 1) ? std::vector(n, 0) : lin_to_multi(beg, rpt); int s = std::accumulate(rpt.begin(), rpt.end(), 0); int s1 = std::floor(0.5 * s) + 1; std::vector z1(s1, 1.0L); std::vector z2(s1, 1.0L); - std::vector nu2(n); - for (int i = 0; i < n; i++) - nu2[i] = 0.5 * rpt[i] - x[i]; + std::vector nu2(n); + for (int i = 0; i < n; i++) nu2[i] = 0.5 * rpt[i]; for (int i = 0; i < n; i++) { - q1 += nu2[i] * mu[i]; - for (int j = 0; j < n; j++) { - q += 0.5L * nu2[j] * mat[i * n + j] * nu2[i]; + T nu2_min_x = (nu2[i] - static_cast(x[i])); + q1 += nu2_min_x * mu[i]; + q += 0.5L * nu2_min_x * nu2_min_x * mat[i * n + i]; + for (int j = 0; j < i; j++) { + q += (nu2[j] - static_cast(x[j])) * nu2_min_x * + mat[i * n + j]; } } - int x_sum = std::accumulate(x.begin(), x.end(), 0); - for (int i = 0; i < n; i++) { - p *= get_binom_coeff(rpt[i], x[i]); + if (chunks != 1) { + for (int i = 0; i < n; i++) { + p *= get_binom_coeff(rpt[i], x[i]); + } + p *= (std::accumulate(x.begin(), x.end(), 0) % 2 == 0) ? 1 : -1; } - p *= (x_sum % 2 == 0) ? 1 : -1; for (unsigned long long int i = beg; i < end; i++) { for (int j = 1; j < s1; j++) { @@ -276,7 +282,7 @@ inline T loop_hafnian_rpt(std::vector &mat, std::vector &mu, x[j]); for (int k = 0; k < n; k++) { - q -= mat[k * n + j] * (0.5L * rpt[k] - x[k]); + q -= mat[k * n + j] * (nu2[k] - static_cast(x[k])); } q -= 0.5L * mat[j * n + j]; q1 -= mu[j]; @@ -287,7 +293,8 @@ inline T loop_hafnian_rpt(std::vector &mat, std::vector &mu, p *= -1; } for (int k = 0; k < n; k++) { - q += (1.0L * rpt[j]) * mat[k * n + j] * (0.5L * rpt[k] - x[k]); + q += static_cast(rpt[j]) * mat[k * n + j] * + (nu2[k] - static_cast(x[k])); } q -= 0.5L * rpt[j] * rpt[j] * mat[j * n + j]; q1 += static_cast(rpt[j]) * mu[j]; From ba11956e535670f6f9e5c94b20364de490fa572d Mon Sep 17 00:00:00 2001 From: Trevor Vincent Date: Fri, 17 Jan 2020 15:32:12 -0500 Subject: [PATCH 10/10] Add josh suggestions --- include/repeated_hafnian.hpp | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/include/repeated_hafnian.hpp b/include/repeated_hafnian.hpp index c88417ebb..d36176ce3 100644 --- a/include/repeated_hafnian.hpp +++ b/include/repeated_hafnian.hpp @@ -16,8 +16,8 @@ * \rst * Contains functions for computing the hafnian using the algorithm * described in *From moments of sum to moments of product*, - * `doi:10.1016/j.jmva.2007.01.013 - * `__. \endrst + * `doi:10.1016/j.jmva.2007.01.013 `__. + * \endrst */ #pragma once #include @@ -40,7 +40,7 @@ namespace libwalrus { * @param maxes a vector of integers, representing the max index value * of each indice in the multi-index object. * - * @return hafnian of the input matrix + * @return multi-index corresponding to the linear index */ std::vector lin_to_multi(unsigned long long int linear_index, const std::vector &maxes) { @@ -60,19 +60,19 @@ std::vector lin_to_multi(unsigned long long int linear_index, } /** - * Returns the binomial coefficient N!/K!(N-K)! + * Returns the binomial coefficient \f$N!/K!(N-K)!\f$ * Adapted from http://blog.plover.com/math/choose.html * * @param N * @param K * - * @return N!/K!(N-K)! + * @return \f$N!/K!(N-K)!\f$ */ -template T get_binom_coeff(T N, T K) { +template +T get_binom_coeff(T N, T K) { T r = 1; T d; - if (K > N) - return 0; + if (K > N) return 0; for (d = 1; d <= K; d++) { r *= N--; r /= d; @@ -106,7 +106,6 @@ inline T hafnian_rpt(std::vector &mat, std::vector &rpt) { T q = 0.0L; long double p = 2; - // std::vector x(n, 0.0); int s = std::accumulate(rpt.begin(), rpt.end(), 0); int s2 = s / 2; @@ -408,10 +407,9 @@ double hafnian_rpt_quad(std::vector &mat, std::vector &rpt) { * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all ones. * @return loop hafnian of the input matrix */ -std::complex -loop_hafnian_rpt_quad(std::vector> &mat, - std::vector> &mu, - std::vector &rpt) { +std::complex loop_hafnian_rpt_quad( + std::vector> &mat, + std::vector> &mu, std::vector &rpt) { std::vector> matq(mat.begin(), mat.end()); std::vector> muq(mu.begin(), mu.end()); std::complex haf;