Skip to content

Commit

Permalink
Merge branch 'main' into feat/vlad/refactor-from-affine
Browse files Browse the repository at this point in the history
  • Loading branch information
vladfdp authored Jul 5, 2024
2 parents 6336e74 + 73cd4c0 commit fb707d5
Show file tree
Hide file tree
Showing 42 changed files with 794 additions and 345 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/cpp_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ jobs:
build_args: -DEXT_FIELD=ON
- name: stark252
build_args: -DEXT_FIELD=OFF
- name: m31
build_args: -DEXT_FIELD=ON
steps:
- name: Checkout Repo
uses: actions/checkout@v4
Expand Down
11 changes: 9 additions & 2 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ jobs:
# We need to limit the number of threads to avoid running out of memory on weaker machines
# ignored tests are polynomial tests. Since they conflict with NTT tests, they are executed separately
run: |
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --release --verbose --features=g2 -- --test-threads=2 --ignored
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --release --verbose --features=g2 -- --test-threads=2
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --exclude icicle-m31 --release --verbose --features=g2 -- --test-threads=2 --ignored
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --exclude icicle-m31 --release --verbose --features=g2 -- --test-threads=2
- name: Run baby bear tests
working-directory: ./wrappers/rust/icicle-fields/icicle-babybear
Expand All @@ -79,6 +79,13 @@ jobs:
cargo test --release --verbose -- --ignored
cargo test --release --verbose
- name: Run m31 tests
working-directory: ./wrappers/rust/icicle-fields/icicle-m31
if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
run: |
cargo test --release --verbose -- --ignored
cargo test --release --verbose
# build-windows:
# name: Build on Windows
# runs-on: windows-2022
Expand Down
2 changes: 1 addition & 1 deletion icicle/cmake/FieldsCommon.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
function(check_field)
set(SUPPORTED_FIELDS babybear;stark252)
set(SUPPORTED_FIELDS babybear;stark252;m31)

set(IS_FIELD_SUPPORTED FALSE)
set(I 1000)
Expand Down
127 changes: 66 additions & 61 deletions icicle/include/api/babybear.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,48 @@
extern "C" cudaError_t babybear_extension_ntt_cuda(
const babybear::extension_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::extension_t* output);

extern "C" cudaError_t babybear_create_poseidon2_constants_cuda(
int width,
int alpha,
int internal_rounds,
int external_rounds,
const babybear::scalar_t* round_constants,
const babybear::scalar_t* internal_matrix_diag,
poseidon2::MdsType mds_type,
poseidon2::DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);
extern "C" cudaError_t babybear_initialize_domain(
babybear::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);

extern "C" cudaError_t babybear_init_poseidon2_constants_cuda(
int width,
poseidon2::MdsType mds_type,
poseidon2::DiffusionStrategy diffusion,
extern "C" cudaError_t babybear_ntt_cuda(
const babybear::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::scalar_t* output);

extern "C" cudaError_t babybear_release_domain(device_context::DeviceContext& ctx);

extern "C" void babybear_generate_scalars(babybear::scalar_t* scalars, int size);

extern "C" cudaError_t babybear_scalar_convert_montgomery(
babybear::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t babybear_extension_mul_cuda(
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);

extern "C" cudaError_t babybear_extension_add_cuda(
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);

extern "C" cudaError_t babybear_extension_accumulate_cuda(
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);

extern "C" cudaError_t babybear_extension_sub_cuda(
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);

extern "C" cudaError_t babybear_extension_transpose_matrix_cuda(
const babybear::extension_t* input,
uint32_t row_size,
uint32_t column_size,
babybear::extension_t* output,
device_context::DeviceContext& ctx,
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);
bool on_device,
bool is_async);

extern "C" cudaError_t babybear_poseidon2_hash_cuda(
const babybear::scalar_t* input,
babybear::scalar_t* output,
int number_of_states,
int width,
const poseidon2::Poseidon2Constants<babybear::scalar_t>& constants,
poseidon2::Poseidon2Config& config);
extern "C" cudaError_t babybear_extension_bit_reverse_cuda(
const babybear::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, babybear::extension_t* output);

extern "C" cudaError_t babybear_release_poseidon2_constants_cuda(
poseidon2::Poseidon2Constants<babybear::scalar_t>* constants,
device_context::DeviceContext& ctx);

extern "C" void babybear_extension_generate_scalars(babybear::extension_t* scalars, int size);

extern "C" cudaError_t babybear_extension_scalar_convert_montgomery(
babybear::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t babybear_mul_cuda(
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);
Expand All @@ -72,45 +84,38 @@ extern "C" cudaError_t babybear_transpose_matrix_cuda(
bool is_async);

extern "C" cudaError_t babybear_bit_reverse_cuda(
const babybear::scalar_t* input,
uint64_t n,
vec_ops::BitReverseConfig& config,
babybear::scalar_t* output);

extern "C" void babybear_generate_scalars(babybear::scalar_t* scalars, int size);

extern "C" cudaError_t babybear_scalar_convert_montgomery(
babybear::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t babybear_initialize_domain(
babybear::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);

extern "C" cudaError_t babybear_ntt_cuda(
const babybear::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::scalar_t* output);

extern "C" cudaError_t babybear_release_domain(device_context::DeviceContext& ctx);

extern "C" void babybear_extension_generate_scalars(babybear::extension_t* scalars, int size);
const babybear::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, babybear::scalar_t* output);

extern "C" cudaError_t babybear_extension_scalar_convert_montgomery(
babybear::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t babybear_extension_mul_cuda(
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
extern "C" cudaError_t babybear_create_poseidon2_constants_cuda(
int width,
int alpha,
int internal_rounds,
int external_rounds,
const babybear::scalar_t* round_constants,
const babybear::scalar_t* internal_matrix_diag,
poseidon2::MdsType mds_type,
poseidon2::DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);

extern "C" cudaError_t babybear_extension_add_cuda(
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
extern "C" cudaError_t babybear_init_poseidon2_constants_cuda(
int width,
poseidon2::MdsType mds_type,
poseidon2::DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);

extern "C" cudaError_t babybear_extension_sub_cuda(
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
extern "C" cudaError_t babybear_poseidon2_hash_cuda(
const babybear::scalar_t* input,
babybear::scalar_t* output,
int number_of_states,
int width,
const poseidon2::Poseidon2Constants<babybear::scalar_t>& constants,
poseidon2::Poseidon2Config& config);

extern "C" cudaError_t babybear_extension_transpose_matrix_cuda(
const babybear::extension_t* input,
uint32_t row_size,
uint32_t column_size,
babybear::extension_t* output,
device_context::DeviceContext& ctx,
bool on_device,
bool is_async);
extern "C" cudaError_t babybear_release_poseidon2_constants_cuda(
poseidon2::Poseidon2Constants<babybear::scalar_t>* constants,
device_context::DeviceContext& ctx);

#endif
96 changes: 47 additions & 49 deletions icicle/include/api/bls12_377.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,20 @@
#include "poseidon/poseidon.cuh"
#include "poseidon/tree/merkle.cuh"

extern "C" bool bls12_377_g2_eq(bls12_377::g2_projective_t* point1, bls12_377::g2_projective_t* point2);

extern "C" void bls12_377_g2_to_affine(bls12_377::g2_projective_t* point, bls12_377::g2_affine_t* point_out);

extern "C" void bls12_377_g2_generate_projective_points(bls12_377::g2_projective_t* points, int size);

extern "C" void bls12_377_g2_generate_affine_points(bls12_377::g2_affine_t* points, int size);

extern "C" cudaError_t bls12_377_g2_affine_convert_montgomery(
bls12_377::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t bls12_377_g2_projective_convert_montgomery(
bls12_377::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t bls12_377_g2_precompute_msm_bases_cuda(
bls12_377::g2_affine_t* bases,
int msm_size,
Expand All @@ -34,20 +48,6 @@ extern "C" cudaError_t bls12_377_precompute_msm_bases_cuda(
extern "C" cudaError_t bls12_377_msm_cuda(
const bls12_377::scalar_t* scalars, const bls12_377::affine_t* points, int msm_size, msm::MSMConfig& config, bls12_377::projective_t* out);

extern "C" bool bls12_377_g2_eq(bls12_377::g2_projective_t* point1, bls12_377::g2_projective_t* point2);

extern "C" void bls12_377_g2_to_affine(bls12_377::g2_projective_t* point, bls12_377::g2_affine_t* point_out);

extern "C" void bls12_377_g2_generate_projective_points(bls12_377::g2_projective_t* points, int size);

extern "C" void bls12_377_g2_generate_affine_points(bls12_377::g2_affine_t* points, int size);

extern "C" cudaError_t bls12_377_g2_affine_convert_montgomery(
bls12_377::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t bls12_377_g2_projective_convert_montgomery(
bls12_377::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t bls12_377_ecntt_cuda(
const bls12_377::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_377::scalar_t>& config, bls12_377::projective_t* output);

Expand All @@ -65,32 +65,18 @@ extern "C" cudaError_t bls12_377_affine_convert_montgomery(
extern "C" cudaError_t bls12_377_projective_convert_montgomery(
bls12_377::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t bls12_377_create_optimized_poseidon_constants_cuda(
int arity,
int full_rounds_half,
int partial_rounds,
const bls12_377::scalar_t* constants,
device_context::DeviceContext& ctx,
poseidon::PoseidonConstants<bls12_377::scalar_t>* poseidon_constants);
extern "C" cudaError_t bls12_377_initialize_domain(
bls12_377::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);

extern "C" cudaError_t bls12_377_init_optimized_poseidon_constants_cuda(
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bls12_377::scalar_t>* constants);
extern "C" cudaError_t bls12_377_ntt_cuda(
const bls12_377::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_377::scalar_t>& config, bls12_377::scalar_t* output);

extern "C" cudaError_t bls12_377_poseidon_hash_cuda(
bls12_377::scalar_t* input,
bls12_377::scalar_t* output,
int number_of_states,
int arity,
const poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
poseidon::PoseidonConfig& config);
extern "C" cudaError_t bls12_377_release_domain(device_context::DeviceContext& ctx);

extern "C" cudaError_t bls12_377_build_poseidon_merkle_tree(
const bls12_377::scalar_t* leaves,
bls12_377::scalar_t* digests,
uint32_t height,
int arity,
poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
merkle::TreeBuilderConfig& config);
extern "C" void bls12_377_generate_scalars(bls12_377::scalar_t* scalars, int size);

extern "C" cudaError_t bls12_377_scalar_convert_montgomery(
bls12_377::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);

extern "C" cudaError_t bls12_377_mul_cuda(
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);
Expand All @@ -114,22 +100,34 @@ extern "C" cudaError_t bls12_377_transpose_matrix_cuda(
bool is_async);

extern "C" cudaError_t bls12_377_bit_reverse_cuda(
const bls12_377::scalar_t* input,
uint64_t n,
vec_ops::BitReverseConfig& config,
bls12_377::scalar_t* output);
const bls12_377::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bls12_377::scalar_t* output);

extern "C" void bls12_377_generate_scalars(bls12_377::scalar_t* scalars, int size);

extern "C" cudaError_t bls12_377_scalar_convert_montgomery(
bls12_377::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
extern "C" cudaError_t bls12_377_create_optimized_poseidon_constants_cuda(
int arity,
int full_rounds_half,
int partial_rounds,
const bls12_377::scalar_t* constants,
device_context::DeviceContext& ctx,
poseidon::PoseidonConstants<bls12_377::scalar_t>* poseidon_constants);

extern "C" cudaError_t bls12_377_initialize_domain(
bls12_377::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
extern "C" cudaError_t bls12_377_init_optimized_poseidon_constants_cuda(
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bls12_377::scalar_t>* constants);

extern "C" cudaError_t bls12_377_ntt_cuda(
const bls12_377::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_377::scalar_t>& config, bls12_377::scalar_t* output);
extern "C" cudaError_t bls12_377_poseidon_hash_cuda(
bls12_377::scalar_t* input,
bls12_377::scalar_t* output,
int number_of_states,
int arity,
const poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
poseidon::PoseidonConfig& config);

extern "C" cudaError_t bls12_377_release_domain(device_context::DeviceContext& ctx);
extern "C" cudaError_t bls12_377_build_poseidon_merkle_tree(
const bls12_377::scalar_t* leaves,
bls12_377::scalar_t* digests,
uint32_t height,
int arity,
poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
merkle::TreeBuilderConfig& config);

#endif
Loading

0 comments on commit fb707d5

Please sign in to comment.