Skip to content

Commit

Permalink
test from device memory too
Browse files Browse the repository at this point in the history
  • Loading branch information
yshekel committed Oct 8, 2024
1 parent 75849e0 commit 3dc380b
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 24 deletions.
36 changes: 19 additions & 17 deletions icicle/backend/cpu/src/hash/cpu_keccak.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,13 @@
* ---------------------------------------------------------------------- */

#include "icicle/backend/hash/keccak_backend.h"
#include "icicle/utils/modifiers.h"

namespace icicle {

// Configuration flags for Keccak and SHA-3
#define SHA3_USE_KECCAK_FLAG (0x80000000)
#define SHA3_CW(x) ((x) & (~SHA3_USE_KECCAK_FLAG))

#if defined(_MSC_VER)
#define SHA3_CONST(x) x
#else
#define SHA3_CONST(x) x##L
#endif

#ifndef SHA3_ROTL64
#define SHA3_ROTL64(x, y) (((x) << (y)) | ((x) >> ((sizeof(uint64_t) * 8) - (y))))
#endif
Expand All @@ -41,9 +35,13 @@ namespace icicle {
size); // if size==0 using default input chunk size. This is useful for Merkle-Tree constructions
ICICLE_LOG_DEBUG << name() << "::hash() called, batch=" << config.batch << ", input-size=" << size << " bytes";

// TODO (future): use tasks manager to parallel across threads. Add option to config-extension to set #threads
// with default=0. for now we don't do it and let the merkle-tree define the parallelizm so hashing a large batch
// outside a merkle-tree context is not as fast as it could be.
// Note that for batch=1 this has not effect.
for (unsigned batch_idx = 0; batch_idx < config.batch; ++batch_idx) {
eIcicleError err = sha3_hash_buffer(
8 * digest_size_in_bytes, m_is_keccak, input + batch_idx * single_input_size, single_input_size,
8 * digest_size_in_bytes /*=bitsize*/, m_is_keccak, input + batch_idx * single_input_size, single_input_size,
output + batch_idx * digest_size_in_bytes);

if (err != eIcicleError::SUCCESS) { return err; }
Expand Down Expand Up @@ -78,14 +76,18 @@ namespace icicle {
};

const uint64_t KeccakBackendCPU::s_keccakf_rndc[24] = {
SHA3_CONST(0x0000000000000001UL), SHA3_CONST(0x0000000000008082UL), SHA3_CONST(0x800000000000808aUL),
SHA3_CONST(0x8000000080008000UL), SHA3_CONST(0x000000000000808bUL), SHA3_CONST(0x0000000080000001UL),
SHA3_CONST(0x8000000080008081UL), SHA3_CONST(0x8000000000008009UL), SHA3_CONST(0x000000000000008aUL),
SHA3_CONST(0x0000000000000088UL), SHA3_CONST(0x0000000080008009UL), SHA3_CONST(0x000000008000000aUL),
SHA3_CONST(0x000000008000808bUL), SHA3_CONST(0x800000000000008bUL), SHA3_CONST(0x8000000000008089UL),
SHA3_CONST(0x8000000000008003UL), SHA3_CONST(0x8000000000008002UL), SHA3_CONST(0x8000000000000080UL),
SHA3_CONST(0x000000000000800aUL), SHA3_CONST(0x800000008000000aUL), SHA3_CONST(0x8000000080008081UL),
SHA3_CONST(0x8000000000008080UL), SHA3_CONST(0x0000000080000001UL), SHA3_CONST(0x8000000080008008UL)};
LONG_CONST_SUFFIX(0x0000000000000001UL), LONG_CONST_SUFFIX(0x0000000000008082UL),
LONG_CONST_SUFFIX(0x800000000000808aUL), LONG_CONST_SUFFIX(0x8000000080008000UL),
LONG_CONST_SUFFIX(0x000000000000808bUL), LONG_CONST_SUFFIX(0x0000000080000001UL),
LONG_CONST_SUFFIX(0x8000000080008081UL), LONG_CONST_SUFFIX(0x8000000000008009UL),
LONG_CONST_SUFFIX(0x000000000000008aUL), LONG_CONST_SUFFIX(0x0000000000000088UL),
LONG_CONST_SUFFIX(0x0000000080008009UL), LONG_CONST_SUFFIX(0x000000008000000aUL),
LONG_CONST_SUFFIX(0x000000008000808bUL), LONG_CONST_SUFFIX(0x800000000000008bUL),
LONG_CONST_SUFFIX(0x8000000000008089UL), LONG_CONST_SUFFIX(0x8000000000008003UL),
LONG_CONST_SUFFIX(0x8000000000008002UL), LONG_CONST_SUFFIX(0x8000000000000080UL),
LONG_CONST_SUFFIX(0x000000000000800aUL), LONG_CONST_SUFFIX(0x800000008000000aUL),
LONG_CONST_SUFFIX(0x8000000080008081UL), LONG_CONST_SUFFIX(0x8000000000008080UL),
LONG_CONST_SUFFIX(0x0000000080000001UL), LONG_CONST_SUFFIX(0x8000000080008008UL)};

const unsigned KeccakBackendCPU::s_keccakf_rotc[24] = {1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44};
Expand Down Expand Up @@ -236,7 +238,7 @@ namespace icicle {
}

ctx->u.s[ctx->wordIndex] ^= ctx->saved ^ t;
ctx->u.s[SHA3_KECCAK_SPONGE_WORDS - SHA3_CW(ctx->capacityWords) - 1] ^= SHA3_CONST(0x8000000000000000UL);
ctx->u.s[SHA3_KECCAK_SPONGE_WORDS - SHA3_CW(ctx->capacityWords) - 1] ^= LONG_CONST_SUFFIX(0x8000000000000000UL);
keccakf(ctx->u.s);

/* Return first bytes of the ctx->s. This conversion is not needed for
Expand Down
6 changes: 6 additions & 0 deletions icicle/include/icicle/utils/modifiers.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,9 @@
#define __host__
#define __device__
#endif

#if defined(_MSC_VER)
#define LONG_CONST_SUFFIX(x) x
#else
#define LONG_CONST_SUFFIX(x) x##L
#endif
32 changes: 25 additions & 7 deletions icicle/tests/test_hash_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,29 +132,46 @@ TEST_F(HashApiTest, Keccak256Batch)
TEST_F(HashApiTest, KeccakLarge)
{
auto config = default_hash_config();
config.batch = 1 << 10;
config.batch = 1 << 8;
const unsigned chunk_size = 1 << 13; // 8KB chunks
const unsigned total_size = chunk_size * config.batch;
auto input = std::make_unique<std::byte[]>(total_size);
randomize((uint64_t*)input.get(), total_size / sizeof(uint64_t));

const uint64_t output_size = 32;
auto output_main = std::make_unique<std::byte[]>(output_size * config.batch);
auto output_main_case_2 = std::make_unique<std::byte[]>(output_size * config.batch);
auto output_ref = std::make_unique<std::byte[]>(output_size * config.batch);

ICICLE_CHECK(icicle_set_device(s_main_target));
auto keccakCUDA = Keccak256::create();
START_TIMER(cuda_timer);
ICICLE_CHECK(keccakCUDA.hash(input.get(), chunk_size, config, output_main.get()));
END_TIMER(cuda_timer, "CUDA Keccak large time", true);

ICICLE_CHECK(icicle_set_device(s_reference_target));
auto keccakCPU = Keccak256::create();
START_TIMER(cpu_timer);
ICICLE_CHECK(keccakCPU.hash(input.get(), chunk_size, config, output_ref.get()));
END_TIMER(cpu_timer, "CPU Keccak large time", true);

ICICLE_CHECK(icicle_set_device(s_main_target));
auto keccakCUDA = Keccak256::create();

// test with host memory
START_TIMER(cuda_timer);
config.are_inputs_on_device = false;
config.are_outputs_on_device = false;
ICICLE_CHECK(keccakCUDA.hash(input.get(), chunk_size, config, output_main.get()));
END_TIMER(cuda_timer, "CUDA Keccak large time (on host memory)", true);
ASSERT_EQ(0, memcmp(output_main.get(), output_ref.get(), output_size * config.batch));

// test with device memory
std::byte *d_input = nullptr, *d_output = nullptr;
ICICLE_CHECK(icicle_malloc((void**)&d_input, total_size));
ICICLE_CHECK(icicle_malloc((void**)&d_output, output_size * config.batch));
ICICLE_CHECK(icicle_copy(d_input, input.get(), total_size));
config.are_inputs_on_device = true;
config.are_outputs_on_device = true;
START_TIMER(cuda_timer_device_mem);
ICICLE_CHECK(keccakCUDA.hash(d_input, chunk_size, config, d_output));
END_TIMER(cuda_timer_device_mem, "CUDA Keccak large time (on device memory)", true);
ICICLE_CHECK(icicle_copy(output_main_case_2.get(), d_output, output_size * config.batch));
ASSERT_EQ(0, memcmp(output_main_case_2.get(), output_ref.get(), output_size * config.batch));
}

TEST_F(HashApiTest, sha3)
Expand Down Expand Up @@ -230,6 +247,7 @@ class HashSumBackend : public HashBackend

TEST_F(HashApiTest, MerkleTree)
{
ICICLE_CHECK(icicle_set_device(s_reference_target)); // TODO CUDA too
// define input
constexpr int nof_leaves = 100;
uint32_t leaves[nof_leaves];
Expand Down

0 comments on commit 3dc380b

Please sign in to comment.