diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index 17c9d6cd6..8eca95d39 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -10,10 +10,13 @@ * Updates and improves the speed and accuracy of `thewalrus.quantum.fock_tensor`. [#107](https://github.com/XanaduAI/thewalrus/pull/107) +* Add OpenMP support to the repeated moment hafnian code. [#120](https://github.com/XanaduAI/thewalrus/pull/120) + * Improves speed of the functions in `hermite_multidimensional.hpp`. [#123](https://github.com/XanaduAI/thewalrus/pull/123) * Improves speed of the functions in `thewalrus.fock_gradients` by doing calls to optimized functions in `hermite_multidimensional.hpp`. [#123](https://github.com/XanaduAI/thewalrus/pull/123) + ### Bug fixes * Corrects typos in the random number generation in the C++ unit tests. [#118](https://github.com/XanaduAI/thewalrus/pull/118) diff --git a/include/repeated_hafnian.hpp b/include/repeated_hafnian.hpp index 6fd960ac7..d36176ce3 100644 --- a/include/repeated_hafnian.hpp +++ b/include/repeated_hafnian.hpp @@ -22,8 +22,64 @@ #pragma once #include +#if defined(_MSC_VER) +typedef int omp_int_t; +inline omp_int_t omp_get_thread_num() { return 0;} +inline omp_int_t omp_get_num_threads() { return 1;} +#endif + namespace libwalrus { +/** + * Converts a linear index to a multi index + * e.g. if we wanted the multi-index (i,j) of an element in a 2x2 matrix given + * a linear index of 3 in the array storing the matrix, the maxes vector would + * be {1,1} and this function would return (1,0) + * + * @param linear_index the "flattened" index + * @param maxes a vector of integers, representing the max index value + * of each indice in the multi-index object. + * + * @return multi-index corresponding to the linear index + */ +std::vector lin_to_multi(unsigned long long int linear_index, + const std::vector &maxes) { + std::vector i(maxes.size(), 0); + unsigned int l = 0; + int s0 = maxes[0] + 1; + i[l] = linear_index % s0; + unsigned long long int gl = linear_index; + + while (l < i.size() - 1) { + unsigned long long int glp1 = (gl - i[l]) / (maxes[l] + 1); + i[l + 1] = glp1 % (maxes[l + 1] + 1); + gl = glp1; + l++; + } + return i; +} + +/** + * Returns the binomial coefficient \f$N!/K!(N-K)!\f$ + * Adapted from http://blog.plover.com/math/choose.html + * + * @param N + * @param K + * + * @return \f$N!/K!(N-K)!\f$ + */ +template +T get_binom_coeff(T N, T K) { + T r = 1; + T d; + if (K > N) return 0; + for (d = 1; d <= K; d++) { + r *= N--; + r /= d; + } + return r; +} + /** * Returns the hafnian of a matrix using the algorithm * described in *From moments of sum to moments of product*, @@ -41,70 +97,91 @@ namespace libwalrus { */ template inline T hafnian_rpt(std::vector &mat, std::vector &rpt) { - int n = std::sqrt(static_cast(mat.size())); - assert(static_cast(rpt.size()) == n); + int n = std::sqrt(static_cast(mat.size())); + assert(static_cast(rpt.size()) == n); + T y = 0.0; - long double p = 2; - T y = 0.0, q = 0.0; +#pragma omp parallel + { + T q = 0.0L; - std::vector x(n, 0.0); + long double p = 2; int s = std::accumulate(rpt.begin(), rpt.end(), 0); int s2 = s / 2; for (int i = 1; i < s2; i++) { - p /= i + 1; - } - - std::vector nu2(n); - std::transform(rpt.begin(), rpt.end(), nu2.begin(), - std::bind(std::multiplies(), std::placeholders::_1, 0.5L)); - - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - q += 0.5L * nu2[j] * mat[i * n + j] * nu2[i]; - } + p /= i + 1; } unsigned long long int steps = 1; for (auto i : rpt) { - steps *= i + 1; + steps *= i + 1; } steps /= 2; - for (unsigned long long int i = 0; i < steps; i++) { - y += static_cast(p) * pow(q, s2); - - for (int j = 0; j < n; j++) { - - if (x[j] < rpt[j]) { - x[j] += 1; - p *= -static_cast(rpt[j] + 1 - x[j]) / x[j]; - - for (int k = 0; k < n; k++) { - q -= mat[k * n + j] * (nu2[k] - x[k]); - } - q -= 0.5L * mat[j * n + j]; - break; - } - else { - x[j] = 0; - if (rpt[j] % 2 == 1) { - p *= -1; - } - for (int k = 0; k < n; k++) { - q += (1.0L * rpt[j]) * mat[k * n + j] * (nu2[k] - x[k]); - } - q -= 0.5L * rpt[j] * rpt[j] * mat[j * n + j]; - } + int chunks = omp_get_num_threads(); + int id = omp_get_thread_num(); + unsigned long long int chunk_size = steps / chunks; + unsigned long long int beg = id * chunk_size; + unsigned long long int end = (id == chunks - 1) ? steps : beg + chunk_size; + T sum_chunk = 0.L; + + std::vector x = + (chunks == 1) ? std::vector(n, 0) : lin_to_multi(beg, rpt); + std::vector nu2(n); + for (int i = 0; i < n; i++) nu2[i] = 0.5 * rpt[i]; + + if (chunks != 1) { + for (int i = 0; i < n; i++) { + p *= get_binom_coeff(rpt[i], x[i]); + } + p *= (std::accumulate(x.begin(), x.end(), 0) % 2 == 0) ? 1 : -1; + } + + for (int i = 0; i < n; i++) { + T nu2_min_x = (nu2[i] - static_cast(x[i])); + q += 0.5L * nu2_min_x * nu2_min_x * mat[i * n + i]; + for (int j = 0; j < i; j++) { + q += (nu2[j] - static_cast(x[j])) * nu2_min_x * + mat[i * n + j]; + } + } + + for (unsigned long long int i = beg; i < end; i++) { + sum_chunk += static_cast(p) * pow(q, s2); + + for (int j = 0; j < n; j++) { + if (x[j] < rpt[j]) { + x[j] += 1; + p *= -static_cast(rpt[j] + 1 - x[j]) / x[j]; + + for (int k = 0; k < n; k++) { + q -= mat[k * n + j] * (nu2[k] - static_cast(x[k])); + } + q -= 0.5L * mat[j * n + j]; + break; + } else { + x[j] = 0; + if (rpt[j] % 2 == 1) { + p *= -1; + } + for (int k = 0; k < n; k++) { + q += static_cast(rpt[j]) * mat[k * n + j] * + (nu2[k] - static_cast(x[k])); + } + q -= 0.5L * rpt[j] * rpt[j] * mat[j * n + j]; } + } } +#pragma omp critical + y += sum_chunk; + } - return y; + return y; } - /** * Returns the loop hafnian of a matrix using the algorithm * described in *From moments of sum to moments of product*, @@ -122,97 +199,113 @@ inline T hafnian_rpt(std::vector &mat, std::vector &rpt) { * @return loop hafnian of the input matrix */ template -inline T loop_hafnian_rpt(std::vector &mat, std::vector &mu, std::vector &rpt) { - int n = std::sqrt(static_cast(mat.size())); - assert(static_cast(rpt.size()) == n); - +inline T loop_hafnian_rpt(std::vector &mat, std::vector &mu, + std::vector &rpt) { + int n = std::sqrt(static_cast(mat.size())); + assert(static_cast(rpt.size()) == n); + + T y = 0.0L; + unsigned long long int steps = 1; + for (auto i : rpt) { + steps *= i + 1; + } + steps /= 2; + +#pragma omp parallel + { long long int p = 2; - T y = 0.0L, q = 0.0L, q1 = 0.0L; + T q = 0.0L, q1 = 0.0L; - std::vector x(n, 0.0); + int chunks = omp_get_num_threads(); + int id = (chunks == 1) ? 0 : omp_get_thread_num(); + unsigned long long int chunk_size = steps / chunks; + unsigned long long int beg = id * chunk_size; + unsigned long long int end = (id == chunks - 1) ? steps : beg + chunk_size; + + T sum_chunk = 0.L; + + std::vector x = + (chunks == 1) ? std::vector(n, 0) : lin_to_multi(beg, rpt); int s = std::accumulate(rpt.begin(), rpt.end(), 0); int s1 = std::floor(0.5 * s) + 1; std::vector z1(s1, 1.0L); std::vector z2(s1, 1.0L); - // diagonal of matrix mat - // std::vector mu(n); - // for (int i=0; i nu2(n); - std::transform(rpt.begin(), rpt.end(), nu2.begin(), - std::bind(std::multiplies(), std::placeholders::_1, 0.5L)); + std::vector nu2(n); + for (int i = 0; i < n; i++) nu2[i] = 0.5 * rpt[i]; for (int i = 0; i < n; i++) { - q1 += nu2[i] * mu[i]; - for (int j = 0; j < n; j++) { - q += 0.5L * nu2[j] * mat[i * n + j] * nu2[i]; - } + T nu2_min_x = (nu2[i] - static_cast(x[i])); + q1 += nu2_min_x * mu[i]; + q += 0.5L * nu2_min_x * nu2_min_x * mat[i * n + i]; + for (int j = 0; j < i; j++) { + q += (nu2[j] - static_cast(x[j])) * nu2_min_x * + mat[i * n + j]; + } } - unsigned long long int steps = 1; - - for (auto i : rpt) { - steps *= i + 1; + if (chunks != 1) { + for (int i = 0; i < n; i++) { + p *= get_binom_coeff(rpt[i], x[i]); + } + p *= (std::accumulate(x.begin(), x.end(), 0) % 2 == 0) ? 1 : -1; } - steps /= 2; + for (unsigned long long int i = beg; i < end; i++) { + for (int j = 1; j < s1; j++) { + z1[j] = z1[j - 1] * q / (1.0L * j); + } - for (unsigned long long int i = 0; i < steps; i++) { + if (s % 2 == 1) { + z2[0] = q1; for (int j = 1; j < s1; j++) { - z1[j] = z1[j - 1] * q / (1.0L * j); - } - - if (s % 2 == 1) { - z2[0] = q1; - for (int j = 1; j < s1; j++) { - z2[j] = z2[j - 1] * pow(q1, 2) / (2.0L * j) / (2.0L * j + 1); - } - } - else { - for (int j = 1; j < s1; j++) { - z2[j] = z2[j - 1] * pow(q1, 2) / (2.0L * j) / (2.0L * j - 1); - } + z2[j] = z2[j - 1] * pow(q1, 2) / (2.0L * j) / (2.0L * j + 1); } - - - T z1z2prod = 0.0; - for (int j = 0; j < s1; j++) { - z1z2prod += z1[j] * z2[s1 - 1 - j]; + } else { + for (int j = 1; j < s1; j++) { + z2[j] = z2[j - 1] * pow(q1, 2) / (2.0L * j) / (2.0L * j - 1); } - - y += static_cast(p) * z1z2prod; - - for (int j = 0; j < n; j++) { - - if (x[j] < rpt[j]) { - x[j] += 1; - p = -std::round(p * static_cast(rpt[j] + 1 - x[j]) / x[j]); - - for (int k = 0; k < n; k++) { - q -= mat[k * n + j] * (nu2[k] - x[k]); - } - q -= 0.5L * mat[j * n + j]; - q1 -= mu[j]; - break; - } - else { - x[j] = 0; - if (rpt[j] % 2 == 1) { - p *= -1; - } - for (int k = 0; k < n; k++) { - q += (1.0L * rpt[j]) * mat[k * n + j] * (nu2[k] - x[k]); - } - q -= 0.5L * rpt[j] * rpt[j] * mat[j * n + j]; - q1 += static_cast(rpt[j]) * mu[j]; - } + } + + T z1z2prod = 0.0; + for (int j = 0; j < s1; j++) { + z1z2prod += z1[j] * z2[s1 - 1 - j]; + } + + sum_chunk += static_cast(p) * z1z2prod; + + for (int j = 0; j < n; j++) { + if (x[j] < rpt[j]) { + x[j] += 1; + p = -std::round(p * static_cast(rpt[j] + 1 - x[j]) / + x[j]); + + for (int k = 0; k < n; k++) { + q -= mat[k * n + j] * (nu2[k] - static_cast(x[k])); + } + q -= 0.5L * mat[j * n + j]; + q1 -= mu[j]; + break; + } else { + x[j] = 0; + if (rpt[j] % 2 == 1) { + p *= -1; + } + for (int k = 0; k < n; k++) { + q += static_cast(rpt[j]) * mat[k * n + j] * + (nu2[k] - static_cast(x[k])); + } + q -= 0.5L * rpt[j] * rpt[j] * mat[j * n + j]; + q1 += static_cast(rpt[j]) * mu[j]; } + } } - return y; +#pragma omp critical + y += sum_chunk; + } + + return y; } /** @@ -238,21 +331,21 @@ inline T loop_hafnian_rpt(std::vector &mat, std::vector &mu, std::vector hafnian_rpt_quad(std::vector> &mat, std::vector &rpt) { - std::vector> matq(mat.begin(), mat.end()); - int s = std::accumulate(rpt.begin(), rpt.end(), 0); - int n = std::sqrt(static_cast(mat.size())); - std::complex haf; - - if ( s == 0 || n == 0) - haf = std::complex(1.0, 0.0); - else - haf = hafnian_rpt(matq, rpt); - - return static_cast>(haf); +std::complex hafnian_rpt_quad(std::vector> &mat, + std::vector &rpt) { + std::vector> matq(mat.begin(), mat.end()); + int s = std::accumulate(rpt.begin(), rpt.end(), 0); + int n = std::sqrt(static_cast(mat.size())); + std::complex haf; + + if (s == 0 || n == 0) + haf = std::complex(1.0, 0.0); + else + haf = hafnian_rpt(matq, rpt); + + return static_cast>(haf); } - /** * Returns the hafnian of a matrix using the algorithm * described in *From moments of sum to moments of product*, @@ -277,20 +370,19 @@ std::complex hafnian_rpt_quad(std::vector> &mat, st * @return hafnian of the input matrix */ double hafnian_rpt_quad(std::vector &mat, std::vector &rpt) { - std::vector matq(mat.begin(), mat.end()); - int s = std::accumulate(rpt.begin(), rpt.end(), 0); - int n = std::sqrt(static_cast(mat.size())); - long double haf; + std::vector matq(mat.begin(), mat.end()); + int s = std::accumulate(rpt.begin(), rpt.end(), 0); + int n = std::sqrt(static_cast(mat.size())); + long double haf; - if ( s == 0 || n == 0) - haf = 1.0; - else - haf = hafnian_rpt(matq, rpt); + if (s == 0 || n == 0) + haf = 1.0; + else + haf = hafnian_rpt(matq, rpt); - return static_cast(haf); + return static_cast(haf); } - /** * Returns the loop hafnian of a matrix using the algorithm * described in *From moments of sum to moments of product*, @@ -315,22 +407,23 @@ double hafnian_rpt_quad(std::vector &mat, std::vector &rpt) { * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all ones. * @return loop hafnian of the input matrix */ -std::complex loop_hafnian_rpt_quad(std::vector> &mat, std::vector> &mu, std::vector &rpt) { - std::vector> matq(mat.begin(), mat.end()); - std::vector> muq(mu.begin(), mu.end()); - std::complex haf; - int s = std::accumulate(rpt.begin(), rpt.end(), 0); - int n = std::sqrt(static_cast(mat.size())); - - if ( s == 0 || n == 0) - haf = std::complex(1.0, 0.0); - else - haf = loop_hafnian_rpt(matq, muq, rpt); - - return static_cast>(haf); +std::complex loop_hafnian_rpt_quad( + std::vector> &mat, + std::vector> &mu, std::vector &rpt) { + std::vector> matq(mat.begin(), mat.end()); + std::vector> muq(mu.begin(), mu.end()); + std::complex haf; + int s = std::accumulate(rpt.begin(), rpt.end(), 0); + int n = std::sqrt(static_cast(mat.size())); + + if (s == 0 || n == 0) + haf = std::complex(1.0, 0.0); + else + haf = loop_hafnian_rpt(matq, muq, rpt); + + return static_cast>(haf); } - /** * Returns the loop hafnian of a matrix using the algorithm * described in *From moments of sum to moments of product*, @@ -355,19 +448,20 @@ std::complex loop_hafnian_rpt_quad(std::vector> &ma * `mat = [1]` and `rpt = [6]` represents a \f$6\times 6\f$ matrix of all ones. * @return loop hafnian of the input matrix */ -double loop_hafnian_rpt_quad(std::vector &mat, std::vector &mu, std::vector &rpt) { - std::vector matq(mat.begin(), mat.end()); - std::vector muq(mu.begin(), mu.end()); - long double haf; - int s = std::accumulate(rpt.begin(), rpt.end(), 0); - int n = std::sqrt(static_cast(mat.size())); - - if ( s == 0 || n == 0) - haf = 1.0; - else - haf = loop_hafnian_rpt(matq, muq, rpt); - - return static_cast(haf); +double loop_hafnian_rpt_quad(std::vector &mat, std::vector &mu, + std::vector &rpt) { + std::vector matq(mat.begin(), mat.end()); + std::vector muq(mu.begin(), mu.end()); + long double haf; + int s = std::accumulate(rpt.begin(), rpt.end(), 0); + int n = std::sqrt(static_cast(mat.size())); + + if (s == 0 || n == 0) + haf = 1.0; + else + haf = loop_hafnian_rpt(matq, muq, rpt); + + return static_cast(haf); } -} +} // namespace libwalrus