Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Faster random sampling #9655

Merged
merged 7 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions barretenberg/cpp/src/barretenberg/ecc/fields/field_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -691,14 +691,8 @@ template <class T> field<T> field<T>::random_element(numeric::RNG* engine) noexc
constexpr field pow_2_256 = field(uint256_t(1) << 128).sqr();
field lo;
field hi;
*(uint256_t*)lo.data = engine->get_random_uint256();
*(uint256_t*)hi.data = engine->get_random_uint256();
lo.self_reduce_once();
lo.self_reduce_once();
lo.self_reduce_once();
hi.self_reduce_once();
hi.self_reduce_once();
hi.self_reduce_once();
lo = engine->get_random_uint256();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I restored the version that Zac checked in a previous PR. Accidentally got in an unchecked PR in there

hi = engine->get_random_uint256();
return lo + (pow_2_256 * hi);
}

Expand Down
94 changes: 81 additions & 13 deletions barretenberg/cpp/src/barretenberg/numeric/random/engine.cpp
Original file line number Diff line number Diff line change
@@ -1,17 +1,79 @@
#include "engine.hpp"
#include "barretenberg/common/assert.hpp"
#include <array>
#include <cstring>
#include <functional>
#include <mutex>
#include <random>
#include <sys/random.h>

namespace bb::numeric {

namespace {
auto generate_random_data()

#ifndef __wasm__
// When working on native we allocate 1M of memory to sample randomness from urandom
constexpr size_t RANDOM_BUFFER_SIZE = 1UL << 20;
#else
// In wasm the API we are using can only give 256 bytes per call, so there is no point in creating a larger buffer
constexpr size_t RANDOM_BUFFER_SIZE = 256;
constexpr size_t BYTES_PER_GETENTROPY_READ = 256;
#endif
// Buffer with randomness sampled from a CSPRNG
uint8_t random_buffer[RANDOM_BUFFER_SIZE];
// Offset into the unused part of the buffer
ssize_t random_buffer_offset = -1;

#ifndef NO_MULTITHREADING
// We don't want races to happen
std::mutex random_buffer_mutex;
#endif

/**
* @brief Generate an array of random unsigned ints sampled from a CSPRNG
*
* @tparam size_in_unsigned_ints Size of the array
* @return std::array<unsigned int, size_in_unsigned_ints>
*/
template <size_t size_in_unsigned_ints> std::array<unsigned int, size_in_unsigned_ints> generate_random_data()
{
std::array<unsigned int, 32> random_data;
std::random_device source;
std::generate(std::begin(random_data), std::end(random_data), std::ref(source));
static_assert(size_in_unsigned_ints > 0);
static_assert(size_in_unsigned_ints <= 32);
std::array<unsigned int, size_in_unsigned_ints> random_data;
constexpr size_t random_data_buffer_size = sizeof(random_data);

#ifndef NO_MULTITHREADING
std::unique_lock<std::mutex> lock(random_buffer_mutex);
#endif

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like it'd overall work better if the random buffer and random buffer offset were thread_local, then we wouldn't need any locking?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I'll switch. Hope we don't get a situation where we sample very few elements in new threads all the time

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with the thread pool, that wouldn't be the case

// if the buffer is not initialized or doesn't contain enough bytes, sample randomness
// We could preserve the leftover bytes, but it's a bit messy
if (random_buffer_offset == -1 ||
(static_cast<size_t>(random_buffer_offset) + random_data_buffer_size) > RANDOM_BUFFER_SIZE) {
size_t bytes_left = RANDOM_BUFFER_SIZE;
uint8_t* current_offset = random_buffer;
// Sample until we fill the buffer
while (bytes_left != 0) {
#ifndef __wasm__
// Sample from urandom on native
auto read_bytes = getrandom(current_offset, bytes_left, 0);
#else
// Sample through a "syscall" on wasm. We can't request more than 256, it fails and results in an infinite
// loop
ssize_t read_bytes =
getentropy(current_offset, BYTES_PER_GETENTROPY_READ) == -1 ? -1 : BYTES_PER_GETENTROPY_READ;
#endif
// If we read something, update the leftover
if (read_bytes != -1) {
current_offset += read_bytes;
bytes_left -= static_cast<size_t>(read_bytes);
}
}
random_buffer_offset = 0;
}

memcpy(&random_data, random_buffer + random_buffer_offset, random_data_buffer_size);
random_buffer_offset += random_data_buffer_size;
return random_data;
}
} // namespace
Expand All @@ -20,49 +82,55 @@ class RandomEngine : public RNG {
public:
uint8_t get_random_uint8() override
{
auto buf = generate_random_data();
auto buf = generate_random_data<1>();
uint32_t out = buf[0];
return static_cast<uint8_t>(out);
}

uint16_t get_random_uint16() override
{
auto buf = generate_random_data();
auto buf = generate_random_data<1>();
uint32_t out = buf[0];
return static_cast<uint16_t>(out);
}

uint32_t get_random_uint32() override
{
auto buf = generate_random_data();
auto buf = generate_random_data<1>();
uint32_t out = buf[0];
return static_cast<uint32_t>(out);
}

uint64_t get_random_uint64() override
{
auto buf = generate_random_data();
auto buf = generate_random_data<2>();
auto lo = static_cast<uint64_t>(buf[0]);
auto hi = static_cast<uint64_t>(buf[1]);
return (lo + (hi << 32ULL));
}

uint128_t get_random_uint128() override
{
auto big = get_random_uint256();
auto lo = static_cast<uint128_t>(big.data[0]);
auto hi = static_cast<uint128_t>(big.data[1]);
const auto get64 = [](const std::array<uint32_t, 4>& buffer, const size_t offset) {
auto lo = static_cast<uint64_t>(buffer[0 + offset]);
auto hi = static_cast<uint64_t>(buffer[1 + offset]);
return (lo + (hi << 32ULL));
};
auto buf = generate_random_data<4>();
auto lo = static_cast<uint128_t>(get64(buf, 0));
auto hi = static_cast<uint128_t>(get64(buf, 2));

return (lo + (hi << static_cast<uint128_t>(64ULL)));
}

uint256_t get_random_uint256() override
{
const auto get64 = [](const std::array<uint32_t, 32>& buffer, const size_t offset) {
const auto get64 = [](const std::array<uint32_t, 8>& buffer, const size_t offset) {
auto lo = static_cast<uint64_t>(buffer[0 + offset]);
auto hi = static_cast<uint64_t>(buffer[1 + offset]);
return (lo + (hi << 32ULL));
};
auto buf = generate_random_data();
auto buf = generate_random_data<8>();
uint64_t lolo = get64(buf, 0);
uint64_t lohi = get64(buf, 2);
uint64_t hilo = get64(buf, 4);
Expand Down
Loading