From 3b0b9e5ab7605725a9e3e36714a5572244ac03fb Mon Sep 17 00:00:00 2001 From: TabOg <61025163+TabOg@users.noreply.github.com> Date: Tue, 4 Oct 2022 19:15:16 +0100 Subject: [PATCH 1/2] Optimizations to Frobenius automorphisms Optimize the function which maps only non zero elements to 1, and zero elements to 0, by implementing an alternative algorithm which takes O(logd) Frobenius automorphisms rather than O(d). Co-authored-by: Jack Crawford --- include/helib/EncryptedArray.h | 2 +- src/eqtesting.cpp | 59 ++++++++++++++++++++++++++++------ 2 files changed, 50 insertions(+), 11 deletions(-) diff --git a/include/helib/EncryptedArray.h b/include/helib/EncryptedArray.h index 9359feb19..d34b09ab4 100644 --- a/include/helib/EncryptedArray.h +++ b/include/helib/EncryptedArray.h @@ -2651,7 +2651,7 @@ inline void totalSums(Ctxt& ctxt) //! @brief Map all non-zero slots to 1, leaving zero slots as zero. //! Assumes that r=1, and that all the slots contain elements from GF(p^d). -void mapTo01(const EncryptedArray& ea, Ctxt& ctxt); +void mapTo01(const EncryptedArray& ea, Ctxt& ctxt, bool multithread = true); // Implemented in eqtesting.cpp. We compute // x^{p^d-1} = x^{(1+p+...+p^{d-1})*(p-1)} // by setting y=x^{p-1} and then outputting y * y^p * ... * y^{p^{d-1}}, diff --git a/src/eqtesting.cpp b/src/eqtesting.cpp index b310190d5..d11bb1786 100644 --- a/src/eqtesting.cpp +++ b/src/eqtesting.cpp @@ -9,6 +9,21 @@ * See the License for the specific language governing permissions and * limitations under the License. See accompanying LICENSE file. */ + +/* Copyright (C) 2022 Intel Corporation + * SPDX-License-Identifier: Apache-2.0 + * + * Modifying HElib to optimize the 01 map. + * Contributions include + * Modified: + * mapTo01 + * added parallelism to existing logic for norm calculation + * added alternative logic for norm calculation which uses log(d) + * automorphisms on a single core + * added an additional optional argument `multithread` which determines + * which version to run + * + */ /** * @file eqtesting.cpp * @brief Useful functions for equality testing... @@ -17,6 +32,7 @@ #include #include #include +#include #include @@ -29,10 +45,7 @@ namespace helib { // and then outputting y * y^p * ... * y^{p^{d-1}}, with exponentiation to // powers of p done via Frobenius. -// FIXME: the computation of the "norm" y * y^p * ... * y^{p^{d-1}} -// can be done using O(log d) automorphisms, rather than O(d). - -void mapTo01(const EncryptedArray& ea, Ctxt& ctxt) +void mapTo01(const EncryptedArray& ea, Ctxt& ctxt, bool multithread) { long p = ctxt.getPtxtSpace(); if (p != ea.getPAlgebra().getP()) // ptxt space is p^r for r>1 @@ -40,13 +53,39 @@ void mapTo01(const EncryptedArray& ea, Ctxt& ctxt) if (p > 2) ctxt.power(p - 1); // set y = x^{p-1} - long d = ea.getDegree(); - if (d > 1) { // compute the product of the d automorphisms - std::vector v(d, ctxt); - for (long i = 1; i < d; i++) - v[i].frobeniusAutomorph(i); - totalProduct(ctxt, v); + // TODO: investigate this trade off more thoroughly + // Computing in parallel over t threads has runtime approximately + // (d - 1)/t, whereas single thread has runtime approx log(d) + if ((NTL::AvailableThreads() > 1) && multithread) { + // Compute O(d) Frobenius automorphisms in parallel + if (d > 1) { + // compute the d - 1 automorphisms in parallel + std::vector v(d, ctxt); + NTL_EXEC_RANGE(d - 1, first, last) + for (long i = first; i < last; i++) + v[i + 1].frobeniusAutomorph(i + 1); + NTL_EXEC_RANGE_END + // and compute the product of the d automorphisms + totalProduct(ctxt, v); + } + } else { + // Compute of the "norm" y * y^p * ... * y^{p^{d-1}} + // using O(log d) automorphisms, rather than O(d). + long e = 1; + long b = NTL::NumBits(d); + Ctxt orig = ctxt; + for (long i = b - 2; i >= 0; i--) { + Ctxt tmp = ctxt; + tmp.frobeniusAutomorph(e); + ctxt *= tmp; + e *= 2; + if (NTL::bit(d, i)) { + ctxt.frobeniusAutomorph(1); + ctxt *= orig; + e++; + } + } } } From fc1dc72fe01991cf1ede770bcba0d8c57614c4c4 Mon Sep 17 00:00:00 2001 From: Jack Crawford Date: Fri, 11 Nov 2022 16:22:46 +0000 Subject: [PATCH 2/2] Multi-threaded reader for PSI iO --- misc/psi/io/io.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/misc/psi/io/io.h b/misc/psi/io/io.h index ee0059bf7..abfed3382 100644 --- a/misc/psi/io/io.h +++ b/misc/psi/io/io.h @@ -76,11 +76,14 @@ helib::Database readDbFromFile(const std::string& databaseFilePath, } } } else { // Ctxt query - for (long i = 0; i < nrow; ++i) { - for (long j = 0; j < ncol; ++j) { - reader.value().readDatum(data(i, j), i, j); - } + NTL_EXEC_RANGE(nrow * ncol, first, last) + Reader threadReader(reader.value()); + for (long i = first; i < last; ++i) { + long row = i / ncol; + long col = i % ncol; + threadReader.readDatum(data(row, col), row, col); } + NTL_EXEC_RANGE_END } return helib::Database(data, contextp); @@ -128,11 +131,14 @@ helib::Matrix readQueryFromFile(const std::string& queryFilePath, } } else { // Ctxt query // Read in ctxts - for (long i = 0; i < nrow; ++i) { - for (long j = 0; j < ncol; ++j) { - reader.value().readDatum(query(i, j), i, j); - } + NTL_EXEC_RANGE(nrow * ncol, first, last) + Reader threadReader(reader.value()); + for (long i = first; i < last; ++i) { + long row = i / ncol; + long col = i % ncol; + threadReader.readDatum(query(row, col), row, col); } + NTL_EXEC_RANGE_END if (ncol == 1) { // Transpose to make row vector query.transpose(); }