From db84c25b43de5925384677d820af96d44ef9323c Mon Sep 17 00:00:00 2001 From: Igor Aleksanov Date: Mon, 5 Aug 2024 16:37:54 +0400 Subject: [PATCH 1/2] feat: Make boojum compile with modern compiler --- rust-toolchain.toml | 2 +- src/cs/implementations/fast_serialization.rs | 4 ++-- src/lib.rs | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/rust-toolchain.toml b/rust-toolchain.toml index a671fa6..bc5d1d6 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,2 +1,2 @@ [toolchain] -channel = "nightly-2024-05-07" +channel = "nightly-2024-08-01" diff --git a/src/cs/implementations/fast_serialization.rs b/src/cs/implementations/fast_serialization.rs index ea7301c..7288270 100644 --- a/src/cs/implementations/fast_serialization.rs +++ b/src/cs/implementations/fast_serialization.rs @@ -272,7 +272,7 @@ where { fn write_into_buffer(&self, mut dst: W) -> Result<(), Box> { // we avoid transmute here - let flattened_self = self[..].flatten(); + let flattened_self = self[..].as_flattened(); let len_as_base = flattened_self.len(); let len_le_bytes = (len_as_base as u64).to_le_bytes(); @@ -346,7 +346,7 @@ where { fn write_into_buffer(&self, mut dst: W) -> Result<(), Box> { // we avoid transmute here - let flattened_self = self[..].flatten(); + let flattened_self = self[..].as_flattened(); let len_as_base = flattened_self.len(); let len_le_bytes = (len_as_base as u64).to_le_bytes(); diff --git a/src/lib.rs b/src/lib.rs index 8eb9e9f..6978fa7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,7 +53,6 @@ #![feature(vec_push_within_capacity)] #![feature(return_position_impl_trait_in_trait)] #![feature(type_changing_struct_update)] -#![feature(slice_flatten)] #![cfg_attr(feature = "include_packed_simd", feature(stdsimd))] pub mod algebraic_props; From f88506c4dc048522364becff2651f7f7a6f77ecd Mon Sep 17 00:00:00 2001 From: Igor Aleksanov Date: Mon, 5 Aug 2024 17:14:27 +0400 Subject: [PATCH 2/2] Remove old optimizations that rely on packed_simd --- .github/workflows/ci.yaml | 19 - Cargo.toml | 5 - src/field/goldilocks/arm_asm_packed_impl.rs | 858 ------------------ src/field/goldilocks/mod.rs | 15 - src/implementations/poseidon2/mod.rs | 47 +- .../poseidon2/state_generic_impl.rs | 4 - .../poseidon2/state_vectorized_double.rs | 415 --------- src/lib.rs | 2 +- 8 files changed, 7 insertions(+), 1358 deletions(-) delete mode 100644 src/field/goldilocks/arm_asm_packed_impl.rs delete mode 100644 src/implementations/poseidon2/state_vectorized_double.rs diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f5200a1..a984dd9 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -17,25 +17,6 @@ jobs: - uses: actions-rust-lang/setup-rust-toolchain@v1 - run: cargo build --verbose - run: cargo test --verbose --all - - build_old: - name: cargo build and test (packed_simd) - strategy: - matrix: - # Needs big runners to run tests - # Only macos-13-xlarge is Apple Silicon, as per: - # https://docs.github.com/en/actions/using-github-hosted-runners/about-larger-runners/about-larger-runners#about-macos-larger-runners - os: [ubuntu-22.04-github-hosted-16core, macos-13-xlarge] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v4 - - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - toolchain: nightly-2023-05-31 - - # Still compile the old rust nightly with packed simd - until we have a good replacement in poseidon. - - run: RUSTFLAGS=-Awarnings cargo +nightly-2023-05-31 build --features include_packed_simd - - run: RUSTFLAGS=-Awarnings cargo +nightly-2023-05-31 test --features include_packed_simd formatting: name: cargo fmt diff --git a/Cargo.toml b/Cargo.toml index a628cce..fbf9674 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,6 @@ itertools = "0.10" blake2 = "0.10" sha2 = "0.10" num-modular = "0.5.1" -packed_simd = { version = "0.3.9" , optional = true} pairing = { package = "pairing_ce", version = "=0.28.6" } crypto-bigint = "0.5" convert_case = "0.6" @@ -61,9 +60,5 @@ opt-level = 3 [features] # If enabled, logs will be using trace, if disabled, they will be printed to stdout. log_tracing = ["tracing"] -# Currently packed_simd is no longer working with the newest nightly. -# But we still keep it as a feature, as we didn't migrate all the code, and -# some people might want to use older rust nightly, to be able to gain some performance. -include_packed_simd = ["packed_simd"] cr_paranoia_mode = [] debug_track = [] diff --git a/src/field/goldilocks/arm_asm_packed_impl.rs b/src/field/goldilocks/arm_asm_packed_impl.rs deleted file mode 100644 index 03399c4..0000000 --- a/src/field/goldilocks/arm_asm_packed_impl.rs +++ /dev/null @@ -1,858 +0,0 @@ -use crate::cs::implementations::utils::precompute_twiddles_for_fft; -use crate::cs::traits::GoodAllocator; -use crate::field::{Field, PrimeField}; -use crate::worker::Worker; -use packed_simd::shuffle; -use std::ops::{Add, BitOr, Sub}; -use std::usize; - -use super::GoldilocksField; - -// we need max of an alignment of u64x4 and u64x8 in this implementation, so 64 - -#[derive(PartialEq, Eq, Hash, Clone, Copy)] -#[repr(C, align(64))] -pub struct MixedGL(pub [GoldilocksField; 16]); - -// we also need holder for SIMD targets, because u64x4 has smaller alignment than u64x8 -#[derive(Clone, Copy)] -#[repr(C, align(64))] -struct U64x4Holder([packed_simd::u64x4; 4]); - -impl std::fmt::Debug for MixedGL { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.0) - } -} - -impl std::fmt::Display for MixedGL { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.0) - } -} - -impl MixedGL { - pub const ORDER_BITS: usize = GoldilocksField::ORDER_BITS; - pub const ORDER: u64 = GoldilocksField::ORDER; - pub const TWO_ADICITY: usize = GoldilocksField::TWO_ADICITY; - pub const T: u64 = (Self::ORDER - 1) >> Self::TWO_ADICITY; - pub const BARRETT: u128 = 18446744078004518912; // 0x10000000100000000 - pub const EPSILON: u64 = (1 << 32) - 1; - pub const EPSILON_VECTOR: packed_simd::u64x4 = packed_simd::u64x4::splat(Self::EPSILON); - pub const EPSILON_VECTOR_D: packed_simd::u64x8 = packed_simd::u64x8::splat(Self::EPSILON); - - #[inline(always)] - pub fn new() -> Self { - Self([GoldilocksField::ZERO; 16]) - } - - #[inline(always)] - pub fn from_constant(value: GoldilocksField) -> Self { - Self([value; 16]) - } - - #[inline(always)] - pub fn from_array(value: [GoldilocksField; 16]) -> Self { - Self(value) - } - - #[inline(always)] - #[unroll::unroll_for_loops] - pub fn to_reduced(&mut self) -> &mut Self { - let mut a_u64 = Self::as_u64x4_arrays(self); - - for i in 0..4 { - let a = a_u64.0[i]; - let a_reduced = a.add(Self::EPSILON_VECTOR); - let cmp = a_reduced.lt(Self::EPSILON_VECTOR); - let res = cmp.select(a_reduced, a); - - a_u64.0[i] = res; - } - - unsafe { - *self = Self::from_u64x4_arrays(a_u64); - } - - self - } - - #[inline(always)] - #[unroll::unroll_for_loops] - pub fn mul_constant_assign(&'_ mut self, other: &GoldilocksField) -> &mut Self { - for i in 0..16 { - self.0[i].mul_assign(other); - } - - self - } - - #[inline(always)] - #[unroll::unroll_for_loops] - fn mul_assign_impl(&mut self, other: &Self) -> &mut Self { - for i in 0..16 { - self.0[i].mul_assign(&other.0[i]); - } - - self - } - - #[inline(always)] - #[unroll::unroll_for_loops] - fn add_assign_impl(&mut self, other: &Self) -> &mut Self { - let mut a_u64 = Self::as_u64x4_arrays(self); - let b_u64 = Self::as_u64x4_arrays(other); - - for i in 0..4 { - let a = a_u64.0[i]; - let b = b_u64.0[i]; - //additional reduction over b - let b_reduced = b.add(Self::EPSILON_VECTOR); - let cmp = b_reduced.lt(Self::EPSILON_VECTOR); - let b = cmp.select(b_reduced, b); - //a+b - let sum = a.add(b); - let sum_reduced = sum.add(Self::EPSILON_VECTOR); - let cmp0 = sum_reduced.lt(sum); - let cmp1 = sum.lt(a); - let reduce_flag = cmp0.bitor(cmp1); - let res = reduce_flag.select(sum_reduced, sum); - - a_u64.0[i] = res; - } - - unsafe { - *self = Self::from_u64x4_arrays(a_u64); - } - - self - } - - #[inline(always)] - #[unroll::unroll_for_loops] - fn sub_assign_impl(&'_ mut self, other: &Self) -> &mut Self { - let mut a_u64 = Self::as_u64x4_arrays(self); - let b_u64 = Self::as_u64x4_arrays(other); - - for i in 0..4 { - let a = a_u64.0[i]; - let b = b_u64.0[i]; - //additional reduction over b - let b_reduced = b.add(Self::EPSILON_VECTOR); - let cmp = b_reduced.lt(Self::EPSILON_VECTOR); - let b = cmp.select(b_reduced, b); - //a-b - let diff = a.sub(b); - let diff_reduced = diff.sub(Self::EPSILON_VECTOR); - let cmp = a.lt(b); - let res = cmp.select(diff_reduced, diff); - - a_u64.0[i] = res; - } - - unsafe { - *self = Self::from_u64x4_arrays(a_u64); - } - - self - } - - pub unsafe fn butterfly_1x1_impl(&mut self) -> &mut Self { - let [part1, part2] = MixedGL::as_u64x8_arrays(&*self); - let u: packed_simd::u64x8 = shuffle!(part1, part2, [0, 2, 4, 6, 8, 10, 12, 14]); - let v: packed_simd::u64x8 = shuffle!(part1, part2, [1, 3, 5, 7, 9, 11, 13, 15]); - //additional reduction over v - let v_reduced = v.add(Self::EPSILON_VECTOR_D); - let cmp = v_reduced.lt(Self::EPSILON_VECTOR_D); - let v = cmp.select(v_reduced, v); - // u + v - let sum = u.add(v); - let sum_reduced = sum.add(Self::EPSILON_VECTOR_D); - let cmp0 = sum_reduced.lt(sum); - let cmp1 = sum.lt(u); - let reduce_flag = cmp0.bitor(cmp1); - let res1 = reduce_flag.select(sum_reduced, sum); - // u - v - let diff = u.sub(v); - let diff_reduced = diff.sub(Self::EPSILON_VECTOR_D); - let cmp = u.lt(v); - let res2 = cmp.select(diff_reduced, diff); - - let part1: packed_simd::u64x8 = shuffle!(res1, res2, [0, 8, 1, 9, 2, 10, 3, 11]); - let part2: packed_simd::u64x8 = shuffle!(res1, res2, [4, 12, 5, 13, 6, 14, 7, 15]); - - *self = MixedGL::from_u64x8_arrays([part1, part2]); - - self - } - - pub unsafe fn butterfly_2x2_impl(&mut self) -> &mut Self { - let [part1, part2] = MixedGL::as_u64x8_arrays(&*self); - let u: packed_simd::u64x8 = shuffle!(part1, part2, [0, 1, 4, 5, 8, 9, 12, 13]); - let v: packed_simd::u64x8 = shuffle!(part1, part2, [2, 3, 6, 7, 10, 11, 14, 15]); - //additional reduction over v - let v_reduced = v.add(Self::EPSILON_VECTOR_D); - let cmp = v_reduced.lt(Self::EPSILON_VECTOR_D); - let v = cmp.select(v_reduced, v); - // u + v - let sum = u.add(v); - let sum_reduced = sum.add(Self::EPSILON_VECTOR_D); - let cmp0 = sum_reduced.lt(sum); - let cmp1 = sum.lt(u); - let reduce_flag = cmp0.bitor(cmp1); - let res1 = reduce_flag.select(sum_reduced, sum); - // u - v - let diff = u.sub(v); - let diff_reduced = diff.sub(Self::EPSILON_VECTOR_D); - let cmp = u.lt(v); - let res2 = cmp.select(diff_reduced, diff); - - let part1: packed_simd::u64x8 = shuffle!(res1, res2, [0, 1, 8, 9, 2, 3, 10, 11]); - let part2: packed_simd::u64x8 = shuffle!(res1, res2, [4, 5, 12, 13, 6, 7, 14, 15]); - - *self = MixedGL::from_u64x8_arrays([part1, part2]); - - self - } - - pub unsafe fn butterfly_4x4_impl(&mut self) -> &mut Self { - let [part1, part2] = MixedGL::as_u64x8_arrays(&*self); - let u: packed_simd::u64x8 = shuffle!(part1, part2, [0, 1, 2, 3, 8, 9, 10, 11]); - let v: packed_simd::u64x8 = shuffle!(part1, part2, [4, 5, 6, 7, 12, 13, 14, 15]); - //additional reduction over v - let v_reduced = v.add(Self::EPSILON_VECTOR_D); - let cmp = v_reduced.lt(Self::EPSILON_VECTOR_D); - let v = cmp.select(v_reduced, v); - // u + v - let sum = u.add(v); - let sum_reduced = sum.add(Self::EPSILON_VECTOR_D); - let cmp0 = sum_reduced.lt(sum); - let cmp1 = sum.lt(u); - let reduce_flag = cmp0.bitor(cmp1); - let res1 = reduce_flag.select(sum_reduced, sum); - // u - v - let diff = u.sub(v); - let diff_reduced = diff.sub(Self::EPSILON_VECTOR_D); - let cmp = u.lt(v); - let res2 = cmp.select(diff_reduced, diff); - - let part1: packed_simd::u64x8 = shuffle!(res1, res2, [0, 1, 2, 3, 8, 9, 10, 11]); - let part2: packed_simd::u64x8 = shuffle!(res1, res2, [4, 5, 6, 7, 12, 13, 14, 15]); - - *self = MixedGL::from_u64x8_arrays([part1, part2]); - - self - } - - /// # Safety - /// - /// Pointers must be properly aligned for `MixedGL` type, should point to arrays of length 8, and should point - /// to memory that can be mutated. - /// No references to the same memory should exist when this function is called. - /// Pointers should be different. - pub unsafe fn butterfly_8x8_impl(this: *const u64, other: *const u64) { - debug_assert!(this.addr() % std::mem::align_of::() == 0); - debug_assert!(other.addr() % std::mem::align_of::() == 0); - - let u = std::slice::from_raw_parts_mut(this as *mut u64, 8); - let v = std::slice::from_raw_parts_mut(other as *mut u64, 8); - let a = packed_simd::u64x8::from_slice_aligned(u); - let b = packed_simd::u64x8::from_slice_aligned(v); - //additional reduction over b - let b_reduced = b.add(Self::EPSILON_VECTOR_D); - let cmp = b_reduced.lt(Self::EPSILON_VECTOR_D); - let b = cmp.select(b_reduced, b); - // u + v - let sum = a.add(b); - let sum_reduced = sum.add(Self::EPSILON_VECTOR_D); - let cmp0 = sum_reduced.lt(sum); - let cmp1 = sum.lt(a); - let reduce_flag = cmp0.bitor(cmp1); - let res1 = reduce_flag.select(sum_reduced, sum); - // u - v - let diff = a.sub(b); - let diff_reduced = diff.sub(Self::EPSILON_VECTOR_D); - let cmp = a.lt(b); - let res2 = cmp.select(diff_reduced, diff); - - res1.write_to_slice_aligned(u); - res2.write_to_slice_aligned(v); - } - - /// # Safety - /// - /// Pointers must be properly aligned for `MixedGL` type, should point to arrays of length 16, and should point - /// to memory that can be mutated. - /// No references to the same memory should exist when this function is called. - /// Pointers should be different. - pub unsafe fn butterfly_16x16_impl(mut this: *mut u64, mut other: *mut u64) { - debug_assert!(this.addr() % std::mem::align_of::() == 0); - debug_assert!(other.addr() % std::mem::align_of::() == 0); - - Self::butterfly_8x8_impl(this, other); - this = this.offset(8); - other = other.offset(8); - Self::butterfly_8x8_impl(this, other); - } - - // pub unsafe fn butterfly_16x16_impl( - // this: &mut Self, - // other: &mut Self, - // ) { - // let mut this_ptr = this.0.as_ptr() as *mut u64; - // let mut other_ptr = other.0.as_ptr() as *mut u64; - - // debug_assert!(this_ptr.addr() % std::mem::align_of::() == 0); - // debug_assert!(other_ptr.addr() % std::mem::align_of::() == 0); - - // Self::butterfly_8x8_impl(this_ptr, other_ptr); - // this_ptr = this_ptr.offset(8); - // other_ptr = other_ptr.offset(8); - // Self::butterfly_8x8_impl(this_ptr, other_ptr); - // } - - #[inline(always)] - pub fn from_field_array(input: [GoldilocksField; 16]) -> Self { - Self(input) - } - - #[inline(always)] - fn as_u64x4_arrays(input: &Self) -> U64x4Holder { - // this preserves an alignment - unsafe { std::mem::transmute(*input) } - } - - #[inline(always)] - pub(crate) fn as_u64x8_arrays(input: &Self) -> [packed_simd::u64x8; 2] { - // this preserves an alignment - unsafe { std::mem::transmute(*input) } - } - - #[inline(always)] - unsafe fn from_u64x4_arrays(input: U64x4Holder) -> Self { - // this preserves an alignment - std::mem::transmute(input) - } - - #[inline(always)] - pub(crate) unsafe fn from_u64x8_arrays(input: [packed_simd::u64x8; 2]) -> Self { - // this preserves an alignment - std::mem::transmute(input) - } - - #[inline(always)] - pub fn vec_add_assign(a: &mut [Self], b: &[Self]) { - use crate::field::traits::field_like::PrimeFieldLike; - for (a, b) in a.iter_mut().zip(b.iter()) { - a.add_assign(b, &mut ()); - } - } - - #[inline(always)] - pub fn vec_mul_assign(a: &mut [Self], b: &[Self]) { - use crate::field::traits::field_like::PrimeFieldLike; - for (a, b) in a.iter_mut().zip(b.iter()) { - a.mul_assign(b, &mut ()); - } - } -} - -impl Default for MixedGL { - fn default() -> Self { - Self([GoldilocksField::ZERO; 16]) - } -} - -impl crate::field::traits::field_like::PrimeFieldLike for MixedGL { - type Base = GoldilocksField; - type Context = (); - - #[inline(always)] - fn zero(_ctx: &mut Self::Context) -> Self { - Self([GoldilocksField::ZERO; 16]) - } - #[inline(always)] - fn one(_ctx: &mut Self::Context) -> Self { - Self([GoldilocksField::ONE; 16]) - } - #[inline(always)] - fn minus_one(_ctx: &mut Self::Context) -> Self { - Self([GoldilocksField::MINUS_ONE; 16]) - } - - #[inline(always)] - fn add_assign(&mut self, other: &Self, _ctx: &mut Self::Context) -> &mut Self { - Self::add_assign_impl(self, other) - } - - #[inline(always)] - fn sub_assign(&'_ mut self, other: &Self, _ctx: &mut Self::Context) -> &mut Self { - Self::sub_assign_impl(self, other) - } - - #[inline(always)] - #[unroll::unroll_for_loops] - fn mul_assign(&'_ mut self, other: &Self, _ctx: &mut Self::Context) -> &mut Self { - Self::mul_assign_impl(self, other) - } - - #[inline(always)] - fn square(&'_ mut self, _ctx: &mut Self::Context) -> &'_ mut Self { - let t = *self; - self.mul_assign(&t, _ctx); - - self - } - - #[inline(always)] - #[unroll::unroll_for_loops] - fn negate(&'_ mut self, _ctx: &mut Self::Context) -> &'_ mut Self { - let mut a_u64 = Self::as_u64x4_arrays(self); - - for i in 0..4 { - let a = a_u64.0[i]; - - let is_zero = a.eq(packed_simd::u64x4::splat(0)); - let neg = packed_simd::u64x4::splat(Self::ORDER).sub(a); - let res = is_zero.select(a, neg); - - a_u64.0[i] = res; - } - - unsafe { - *self = Self::from_u64x4_arrays(a_u64); - } - - self - } - - #[inline(always)] - fn double(&'_ mut self, _ctx: &mut Self::Context) -> &'_ mut Self { - let t = *self; - self.add_assign(&t, _ctx); - - self - } - - #[inline(always)] - #[unroll::unroll_for_loops] - fn inverse(&self, _ctx: &mut Self::Context) -> Self { - let mut result = *self; - for i in 0..16 { - result.0[i] = PrimeField::inverse(&result.0[i]).expect("inverse must exist"); - } - - result - } - - #[inline(always)] - fn constant(value: Self::Base, _ctx: &mut Self::Context) -> Self { - Self([value; 16]) - } -} - -impl crate::field::traits::field_like::PrimeFieldLikeVectorized for MixedGL { - type Twiddles = Vec; - type InverseTwiddles = Vec; - #[inline(always)] - fn is_zero(&self) -> bool { - self.0 == [GoldilocksField::ZERO; 16] - } - - #[inline(always)] - fn equals(&self, other: &Self) -> bool { - self.eq(other) - } - - #[inline(always)] - fn mul_all_by_base(&'_ mut self, other: &Self::Base, _ctx: &mut Self::Context) -> &'_ mut Self { - Self::mul_constant_assign(self, other) - } - - #[inline(always)] - fn slice_from_base_slice(input: &[Self::Base]) -> &[Self] { - if input.len() < Self::SIZE_FACTOR { - panic!("too small input size to cast"); - } - debug_assert!(input.len() % Self::SIZE_FACTOR == 0); - debug_assert!(input.as_ptr().addr() % std::mem::align_of::() == 0); - let result_len = input.len() / 16; - unsafe { std::slice::from_raw_parts(input.as_ptr() as *mut Self, result_len) } - } - - #[inline(always)] - fn slice_into_base_slice(input: &[Self]) -> &[Self::Base] { - let result_len = input.len() * 16; - unsafe { std::slice::from_raw_parts(input.as_ptr() as *mut GoldilocksField, result_len) } - } - - #[inline(always)] - fn slice_into_base_slice_mut(input: &mut [Self]) -> &mut [Self::Base] { - let result_len = input.len() * 16; - unsafe { - std::slice::from_raw_parts_mut(input.as_ptr() as *mut GoldilocksField, result_len) - } - } - - #[inline(always)] - fn vec_from_base_vec(input: Vec) -> Vec { - if input.len() < Self::SIZE_FACTOR { - panic!("too small input size to cast"); - } - let (ptr, len, capacity, allocator) = input.into_raw_parts_with_alloc(); - debug_assert!(ptr.addr() % std::mem::align_of::() == 0); - debug_assert!(len % Self::SIZE_FACTOR == 0); - debug_assert!(capacity % Self::SIZE_FACTOR == 0); - - unsafe { - Vec::from_raw_parts_in( - ptr as _, - len / Self::SIZE_FACTOR, - capacity / Self::SIZE_FACTOR, - allocator, - ) - } - } - - #[inline(always)] - fn vec_into_base_vec(input: Vec) -> Vec { - let (ptr, len, capacity, allocator) = input.into_raw_parts_with_alloc(); - - unsafe { - Vec::from_raw_parts_in( - ptr as _, - len * Self::SIZE_FACTOR, - capacity * Self::SIZE_FACTOR, - allocator, - ) - } - } - - #[inline(always)] - fn fft_natural_to_bitreversed( - input: &mut [Self], - coset: Self::Base, - twiddles: &Self::Twiddles, - _ctx: &mut Self::Context, - ) { - // let input = crate::utils::cast_check_alignment_ref_mut_unpack::(input); - // crate::fft::fft_natural_to_bitreversed_cache_friendly(input, coset, twiddles); - - crate::fft::fft_natural_to_bitreversed_mixedgl(input, coset, twiddles); - } - - #[inline(always)] - fn ifft_natural_to_natural( - input: &mut [Self], - coset: Self::Base, - twiddles: &Self::InverseTwiddles, - _ctx: &mut Self::Context, - ) { - // let input = crate::utils::cast_check_alignment_ref_mut_unpack::(input); - // crate::fft::ifft_natural_to_natural_cache_friendly(input, coset, twiddles); - - crate::fft::ifft_natural_to_natural_mixedgl(input, coset, twiddles); - } - - #[inline(always)] - fn precompute_forward_twiddles_for_fft( - fft_size: usize, - worker: &Worker, - ctx: &mut Self::Context, - ) -> Self::Twiddles { - precompute_twiddles_for_fft::( - fft_size, worker, ctx, - ) - } - - #[inline(always)] - fn precompute_inverse_twiddles_for_fft( - fft_size: usize, - worker: &Worker, - ctx: &mut Self::Context, - ) -> Self::Twiddles { - precompute_twiddles_for_fft::( - fft_size, worker, ctx, - ) - } -} - -#[cfg(test)] -mod test { - - use crate::field::goldilocks::MixedGL; - use crate::field::rand_from_rng; - use crate::field::traits::field_like::PrimeFieldLike; - use crate::field::traits::field_like::PrimeFieldLikeVectorized; - use crate::field::{goldilocks::GoldilocksField, Field}; - use crate::utils::clone_respecting_allignment; - - #[test] - fn test_mixedgl_negate() { - let mut ctx = (); - const POLY_SIZE: usize = 1 << 20; - let mut rng = rand::thread_rng(); - - // Generate random Vec - let a: Vec = (0..POLY_SIZE).map(|_| rand_from_rng(&mut rng)).collect(); - - let mut ag = a.clone(); - - for aa in ag.iter_mut() { - Field::negate(aa); - } - - let mut av: Vec = - MixedGL::vec_from_base_vec(clone_respecting_allignment::( - &a, - )); - - // Test over GLPS - for aa in av.iter_mut() { - aa.negate(&mut ctx); - } - - assert_eq!(MixedGL::vec_into_base_vec(av), ag); - } - - use rand::Rng; - - #[test] - fn test_mixedgl_add_assign() { - let mut ctx = (); - const POLY_SIZE: usize = 1 << 24; - let mut rng = rand::thread_rng(); - let _s = GoldilocksField(0x0000000001000000); - - // Generate random Vec - // let a: Vec = (0..POLY_SIZE).map(|_| rand_from_rng(&mut rng)).collect(); - // let b: Vec = (0..POLY_SIZE).map(|_| rand_from_rng(&mut rng)).collect(); - // let a: Vec = (0..POLY_SIZE).map(|_| GoldilocksField(0x0000000000000001)).collect(); - // let b: Vec = (0..POLY_SIZE).map(|_| GoldilocksField(0x0000000001000000)).collect(); - let b: Vec = (0..POLY_SIZE) - .map(|_| GoldilocksField(rng.gen_range(GoldilocksField::ORDER..u64::MAX))) - .collect(); - let a: Vec = (0..POLY_SIZE) - .map(|_| GoldilocksField(rng.gen_range(GoldilocksField::ORDER..u64::MAX))) - .collect(); - // let a: Vec = (0..POLY_SIZE).map(|_| GoldilocksField(0xfffffffff67f1442)).collect(); - // let b: Vec = (0..POLY_SIZE).map(|_| GoldilocksField(0xffffffff9c1d065d)).collect(); - - // dbg!(&a); - // dbg!(&b); - - let mut ag = a.clone(); - let bg = b.clone(); - - for (aa, bb) in ag.iter_mut().zip(bg.iter()) { - Field::add_assign(aa, bb); - } - - let mut av: Vec = - MixedGL::vec_from_base_vec(clone_respecting_allignment::( - &a, - )); - let bv: Vec = - MixedGL::vec_from_base_vec(clone_respecting_allignment::( - &b, - )); - - // Test over GLPS - for (aa, bb) in av.iter_mut().zip(bv.iter()) { - aa.add_assign(bb, &mut ctx); - } - - let avv = MixedGL::vec_into_base_vec(av); - // for i in 0..avv.len() { - // assert_eq!(avv[i], ag[i], "error {}", i); - // } - - // dbg!(&ag[0]); - // dbg!(&avv[0]); - - assert_eq!(avv, ag); - } - - #[test] - fn test_mixedgl_sub_assign() { - let mut ctx = (); - const POLY_SIZE: usize = 1 << 20; - let _rng = rand::thread_rng(); - - // Generate random Vec - // let a: Vec = (0..POLY_SIZE).map(|_| rand_from_rng(&mut rng)).collect(); - // let b: Vec = (0..POLY_SIZE).map(|_| rand_from_rng(&mut rng)).collect(); - let a: Vec = (0..POLY_SIZE) - .map(|_| GoldilocksField(0x0000000000000001)) - .collect(); - let b: Vec = (0..POLY_SIZE) - .map(|_| GoldilocksField(0x0000000001000000)) - .collect(); - - // Test over Goldilocks - let mut ag = a.clone(); - let bg = b.clone(); - - for (aa, bb) in ag.iter_mut().zip(bg.iter()) { - Field::sub_assign(aa, bb); - } - - let mut av: Vec = - MixedGL::vec_from_base_vec(clone_respecting_allignment::( - &a, - )); - let bv: Vec = - MixedGL::vec_from_base_vec(clone_respecting_allignment::( - &b, - )); - - // Test over GLPS - for (aa, bb) in av.iter_mut().zip(bv.iter()) { - aa.sub_assign(bb, &mut ctx); - } - - // dbg!(&ag); - // dbg!(&av); - - assert_eq!(ag, MixedGL::vec_into_base_vec(av)); - } - - #[test] - fn test_mixedgl_mul_assign() { - let mut ctx = (); - const POLY_SIZE: usize = 1 << 20; - let mut rng = rand::thread_rng(); - - // Generate random Vec - let a: Vec = (0..POLY_SIZE).map(|_| rand_from_rng(&mut rng)).collect(); - let b: Vec = (0..POLY_SIZE).map(|_| rand_from_rng(&mut rng)).collect(); - - // Test over Goldilocks - let mut ag = a.clone(); - let bg = b.clone(); - - for (aa, bb) in ag.iter_mut().zip(bg.iter()) { - Field::mul_assign(aa, bb); - } - - let mut av: Vec = - MixedGL::vec_from_base_vec(clone_respecting_allignment::( - &a, - )); - let bv: Vec = - MixedGL::vec_from_base_vec(clone_respecting_allignment::( - &b, - )); - - // Test over GLPS - for (aa, bb) in av.iter_mut().zip(bv.iter()) { - aa.mul_assign(bb, &mut ctx); - } - - // dbg!(&ag); - // dbg!(&av); - - assert_eq!(ag, MixedGL::vec_into_base_vec(av)); - } - - #[test] - fn test_mixedgl_butterfly16x16() { - // let mut ctx = (); - - // let am: [u64;32] = [0x0001000000000000, 0x0000000000000001, 0x0001000000000000, 0x0000000000000001, 0x0000000000000000, 0xffffffff00000000, 0x0000000000000001, 0x0000ffffffffffff, 0x0000000000000000, 0x0001000000000000, 0xffffffff00000000, 0xffffffff00000000, 0xffffffff00000000, 0xfffeffff00000001, 0xfffeffff00000002, 0xfffeffff00000002, - // 0x0000000000000000, 0x0000000000000001, 0x0000000000000000, 0x0001000000000001, 0xfffeffff00000001, 0xffffffff00000000, 0x0001000000000000, 0xfffeffff00000002, 0x0000000000000000, 0xfffeffff00000001, 0xffffffff00000000, 0x0000000000000001, 0x0000ffffffffffff, 0x0000000000000000, 0x0000000000000001, 0x0001000000000000]; - - let am: [u64; 32] = [ - 0x0001000000000000, - 0x0000000000000001, - 0x0001000000000000, - 0x0000000000000001, - 0x0000000000000000, - 0xffffffff00000000, - 0x0000000000000001, - 0x0000ffffffffffff, - 0x0000000000000000, - 0x0001000000000000, - 0xffffffff00000000, - 0xffffffff00000000, - 0xffffffff00000000, - 0xfffeffff00000001, - 0xfffeffff00000002, - 0xfffeffff00000002, - 0x0000000000000000, - 0xffffffff01000001, - 0x0000000000000000, - 0x0000010000ffff00, - 0xfffffeff00000101, - 0xfffffffeff000001, - 0x000000ffffffff00, - 0xfffffeff01000101, - 0x0000000000000000, - 0xfffffeff00000101, - 0xfffffffeff000001, - 0xffffffff01000001, - 0x000000fffeffff00, - 0x0000000000000000, - 0xffffffff01000001, - 0x000000ffffffff00, - ]; - - let a: Vec = am.into_iter().map(GoldilocksField).collect(); - // let b: Vec = bm.into_iter().map(GoldilocksField).collect(); - let _s = GoldilocksField(0x0000000001000000); - - // Test over Goldilocks - let mut ag = a.clone(); - // let mut bg = b.clone(); - let distance_in_cache = 16; - - let mut j = 0; - while j < 16 { - let mut u = ag[j]; - let v = ag[j + distance_in_cache]; - // Field::mul_assign(&mut v, &s); - Field::sub_assign(&mut u, &v); - ag[j + distance_in_cache] = u; - Field::add_assign(&mut ag[j], &v); - - j += 1; - } - - let av: Vec = - MixedGL::vec_from_base_vec(clone_respecting_allignment::( - &a, - )); - // let mut bv: Vec = MixedGL::vec_from_base_vec(clone_respecting_allignment::(&b)); - // let mut av = av[0]; - // let mut bv = bv[0]; - - // Test over MixedGL - // av[1].mul_constant_assign(&s); - unsafe { - MixedGL::butterfly_16x16_impl( - av[0].0.as_ptr() as *mut u64, - av[1].0.as_ptr() as *mut u64, - ); - } - // let mut u = av[0]; - // let mut v = av[1]; - // unsafe { MixedGL::butterfly_16x16_impl(&mut u, &mut v); } - // av[0] = u; - // av[1] = v; - - let ag = - MixedGL::vec_from_base_vec(clone_respecting_allignment::( - &ag, - )); - // let bg = MixedGL::vec_from_base_vec(clone_respecting_allignment::(&bg)); - - dbg!(&ag); - dbg!(&av); - - // dbg!(&bg); - // dbg!(&bv); - - assert_eq!(ag, av); - // assert_eq!(bg, bv); - } -} diff --git a/src/field/goldilocks/mod.rs b/src/field/goldilocks/mod.rs index 82fa6be..26f5382 100644 --- a/src/field/goldilocks/mod.rs +++ b/src/field/goldilocks/mod.rs @@ -12,18 +12,11 @@ mod extension; mod inversion; #[cfg(all( - not(feature = "include_packed_simd"), any(target_feature = "neon", target_feature = "avx2"), not(all(target_feature = "avx512f", target_feature = "avx512vl")) ))] pub mod arm_asm_impl; -#[cfg(all( - feature = "include_packed_simd", - any(target_feature = "neon", target_feature = "avx2"), - not(all(target_feature = "avx512f", target_feature = "avx512vl")) -))] -pub mod arm_asm_packed_impl; #[cfg(not(any( all(target_feature = "avx512f", target_feature = "avx512vl"), target_feature = "neon", @@ -51,19 +44,11 @@ pub mod x86_64_asm_impl; pub mod avx512_impl; #[cfg(all( - not(feature = "include_packed_simd"), any(target_feature = "neon", target_feature = "avx2"), not(all(target_feature = "avx512f", target_feature = "avx512vl")) ))] pub use arm_asm_impl::*; -#[cfg(all( - feature = "include_packed_simd", - any(target_feature = "neon", target_feature = "avx2"), - not(all(target_feature = "avx512f", target_feature = "avx512vl")) -))] -pub use arm_asm_packed_impl::*; - #[cfg(not(any( all(target_feature = "avx512f", target_feature = "avx512vl"), target_feature = "neon", diff --git a/src/implementations/poseidon2/mod.rs b/src/implementations/poseidon2/mod.rs index 28605cb..b3df999 100644 --- a/src/implementations/poseidon2/mod.rs +++ b/src/implementations/poseidon2/mod.rs @@ -4,50 +4,15 @@ use crate::field::goldilocks::GoldilocksField; pub mod params; pub mod state_generic_impl; -#[cfg(not(any( - all( - target_feature = "avx512bw", - target_feature = "avx512cd", - target_feature = "avx512dq", - target_feature = "avx512f", - target_feature = "avx512vl", - ), - all( - feature = "include_packed_simd", - any(target_feature = "neon", target_feature = "avx2") - ) +#[cfg(not(all( + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl", )))] pub use state_generic_impl::*; -// Other poseidon implementations depend on packed_simd 128 -// which is no longer available in std::simd (and packed_simd is no longer -// supported in the newest rust nightly). -#[cfg(all( - feature = "include_packed_simd", - any(target_feature = "neon", target_feature = "avx2"), - not(any( - target_feature = "avx512bw", - target_feature = "avx512cd", - target_feature = "avx512dq", - target_feature = "avx512f", - target_feature = "avx512vl" - )) -))] -pub mod state_vectorized_double; - -#[cfg(all( - feature = "include_packed_simd", - any(target_feature = "neon", target_feature = "avx2"), - not(any( - target_feature = "avx512bw", - target_feature = "avx512cd", - target_feature = "avx512dq", - target_feature = "avx512f", - target_feature = "avx512vl" - )) -))] -pub use state_vectorized_double::*; - #[cfg(all( target_feature = "avx512bw", target_feature = "avx512cd", diff --git a/src/implementations/poseidon2/state_generic_impl.rs b/src/implementations/poseidon2/state_generic_impl.rs index c9b74e8..1d067a9 100644 --- a/src/implementations/poseidon2/state_generic_impl.rs +++ b/src/implementations/poseidon2/state_generic_impl.rs @@ -29,10 +29,6 @@ impl State { pub const T: u64 = (Self::ORDER - 1) >> Self::TWO_ADICITY; pub const BARRETT: u128 = 18446744078004518912; // 0x10000000100000000 pub const EPSILON: u64 = (1 << 32) - 1; - #[cfg(feature = "include_packed_simd")] - pub const EPSILON_VECTOR: packed_simd::u64x4 = packed_simd::u64x4::splat(Self::EPSILON); - #[cfg(feature = "include_packed_simd")] - pub const EPSILON_VECTOR_D: packed_simd::u64x8 = packed_simd::u64x8::splat(Self::EPSILON); pub const RATE: usize = poseidon_goldilocks_params::RATE; pub const CAPACITY: usize = poseidon_goldilocks_params::CAPACITY; diff --git a/src/implementations/poseidon2/state_vectorized_double.rs b/src/implementations/poseidon2/state_vectorized_double.rs deleted file mode 100644 index 94e8aae..0000000 --- a/src/implementations/poseidon2/state_vectorized_double.rs +++ /dev/null @@ -1,415 +0,0 @@ -//! A vectorized implementation of the poseidon2 state. -use crate::field::Field; -use std::ops::{Add, Mul, Shl}; -use std::usize; -use unroll::unroll_for_loops; - -use crate::field::goldilocks::GoldilocksField; -use crate::field::traits::representation::U64Representable; - -use super::poseidon_goldilocks_params; - -#[derive(Default, PartialEq, Eq, Hash, Clone, Copy)] -#[repr(C, align(64))] -pub struct State(pub [u128; 12]); - -// we also need holder for SIMD targets, because u64x4 has smaller alignment than u64x8 -#[derive(Clone, Copy)] -#[repr(C, align(64))] -struct U128x4Holder([packed_simd::u128x4; 3]); - -impl std::fmt::Debug for State { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.0) - } -} - -impl std::fmt::Display for State { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.0) - } -} - -impl State { - pub const ORDER_BITS: usize = GoldilocksField::ORDER_BITS; - pub const ORDER: u128 = GoldilocksField::ORDER as u128; - pub const TWO_ADICITY: usize = GoldilocksField::TWO_ADICITY; - pub const T: u128 = (Self::ORDER - 1) >> Self::TWO_ADICITY; - pub const BARRETT: u128 = 18446744078004518912; // 0x10000000100000000 - pub const EPSILON: u128 = (1 << 32) - 1; - pub const EPSILON_VECTOR: packed_simd::u128x4 = packed_simd::u128x4::splat(Self::EPSILON); - - pub const RATE: usize = poseidon_goldilocks_params::RATE; - pub const CAPACITY: usize = poseidon_goldilocks_params::CAPACITY; - pub const STATE_WIDTH: usize = poseidon_goldilocks_params::STATE_WIDTH; - pub const HALF_NUM_FULL_ROUNDS: usize = poseidon_goldilocks_params::HALF_NUM_FULL_ROUNDS; - pub const NUM_FULL_ROUNDS_TOTAL: usize = poseidon_goldilocks_params::NUM_FULL_ROUNDS_TOTAL; - pub const NUM_PARTIAL_ROUNDS: usize = poseidon_goldilocks_params::NUM_PARTIAL_ROUNDS; - pub const TOTAL_NUM_ROUNDS: usize = poseidon_goldilocks_params::TOTAL_NUM_ROUNDS; - pub const ALL_ROUND_CONSTANTS: [Self; Self::TOTAL_NUM_ROUNDS] = const { - let mut constants_array = [Self([0u128; Self::STATE_WIDTH]); Self::TOTAL_NUM_ROUNDS]; - let mut i = 0; - while i < Self::TOTAL_NUM_ROUNDS { - let mut t = [0u128; 12]; - let mut j = 0; - while j < 12 { - t[j] = poseidon_goldilocks_params::ALL_ROUND_CONSTANTS[i * Self::STATE_WIDTH + j] - as u128; - j += 1; - } - constants_array[i] = Self(t); - i += 1; - } - constants_array - }; - - pub const ALL_INNER_ROUND_CONSTANTS: [u128; Self::TOTAL_NUM_ROUNDS] = const { - let mut constants_array = [0u128; Self::TOTAL_NUM_ROUNDS]; - let mut i = 0; - while i < Self::TOTAL_NUM_ROUNDS { - constants_array[i] = - poseidon_goldilocks_params::ALL_ROUND_CONSTANTS[i * Self::STATE_WIDTH] as u128; - i += 1; - } - constants_array - }; - - pub const M_I_DIAGONAL_ELEMENTS_POWS: [packed_simd::u128x4; 3] = [ - packed_simd::u128x4::new(4, 14, 11, 8), - packed_simd::u128x4::new(0, 5, 2, 9), - packed_simd::u128x4::new(13, 6, 3, 12), - ]; - - pub const M_I_DIAGONAL_ELEMENTS: [packed_simd::u128x4; 3] = [ - packed_simd::u128x4::new(1 << 4, 1 << 14, 1 << 11, 1 << 8), - packed_simd::u128x4::new(1 << 0, 1 << 5, 1 << 2, 1 << 9), - packed_simd::u128x4::new(1 << 13, 1 << 6, 1 << 3, 1 << 12), - ]; - - #[inline(always)] - pub fn new() -> Self { - Self([0u128; 12]) - } - - #[inline(always)] - pub const fn from_u128_array(value: [u128; 12]) -> Self { - Self(value) - } - - #[inline(always)] - #[unroll_for_loops] - pub fn to_reduced(&mut self) -> &mut Self { - let mut a_u64 = Self::as_u128x4_arrays(self); - - for i in 0..3 { - let a = a_u64.0[i]; - let a_reduced = a.add(Self::EPSILON_VECTOR); - let cmp = a_reduced.lt(Self::EPSILON_VECTOR); - let res = cmp.select(a_reduced, a); - - a_u64.0[i] = res; - } - - *self = Self::from_u128x4_arrays(a_u64); - self - } - - #[inline(always)] - #[unroll_for_loops] - fn mul_assign_impl_without_prereduction(&mut self, other: &Self) -> &mut Self { - for i in 0..12 { - let c = self.0[i] * other.0[i]; - self.0[i] = GoldilocksField::from_u128_with_reduction(c).as_u64() as u128; - } - - self - } - - #[inline(always)] - pub fn from_field_array(input: [GoldilocksField; 12]) -> Self { - let mut d = Self::new(); - for i in 0..12 { - d.0[i] = input[i].as_u64() as u128; - } - d - } - - #[inline(always)] - pub fn as_field_array(self) -> [GoldilocksField; 12] { - let mut d = [GoldilocksField::ZERO; 12]; - for i in 0..12 { - d[i] = GoldilocksField::from_u128_with_reduction(self.0[i]); - } - d - } - - #[inline(always)] - fn as_u128x4_arrays(input: &Self) -> U128x4Holder { - // this preserves an alignment - unsafe { std::mem::transmute(*input) } - } - - #[inline(always)] - fn from_u128x4_arrays(input: U128x4Holder) -> Self { - // this preserves an alignment - unsafe { std::mem::transmute(input) } - } - - //vectorized mds_mul - #[inline(always)] - #[unroll_for_loops] - pub fn suggested_mds_mul(&mut self) { - //do we need them permanently permuted? - let x0 = packed_simd::u128x4::new(self.0[0], self.0[4], self.0[8], 0u128); - let x1 = packed_simd::u128x4::new(self.0[1], self.0[5], self.0[9], 0u128); - let x2 = packed_simd::u128x4::new(self.0[2], self.0[6], self.0[10], 0u128); - let x3 = packed_simd::u128x4::new(self.0[3], self.0[7], self.0[11], 0u128); - - let t0 = x0.add(x1); - let t1 = x2.add(x3); - let x1d = x1.shl(1); - let x3d = x3.shl(1); - let t2 = x1d.add(t1); - let t3 = x3d.add(t0); - let t0q = t0.shl(2); - let t1q = t1.shl(2); - let t4 = t1q.add(t3); - let t5 = t0q.add(t2); - let t6 = t3.add(t5); - let t7 = t2.add(t4); - - let y0 = t6.add(t6.wrapping_sum()); - let y1 = t5.add(t5.wrapping_sum()); - let y2 = t7.add(t7.wrapping_sum()); - let y3 = t4.add(t4.wrapping_sum()); - - let mut y = Self::new(); - for i in 0..3 { - y.0[i * 4] = y0.extract(i); - y.0[i * 4 + 1] = y1.extract(i); - y.0[i * 4 + 2] = y2.extract(i); - y.0[i * 4 + 3] = y3.extract(i); - } - - *self = y; - } - - #[inline(always)] - #[unroll_for_loops] - pub fn apply_round_constants(&mut self, round: usize) { - let const_current = Self::ALL_ROUND_CONSTANTS[round]; - let const_u64 = Self::as_u128x4_arrays(&const_current); - let mut state_u64 = Self::as_u128x4_arrays(self); - for i in 0..3 { - state_u64.0[i] = state_u64.0[i].add(const_u64.0[i]); - } - *self = Self::from_u128x4_arrays(state_u64); - } - - #[inline(always)] - #[unroll_for_loops] - pub fn apply_non_linearity(&mut self) { - for i in 0..12 { - self.0[i] = GoldilocksField::from_u128_with_reduction(self.0[i]).as_u64() as u128; - } - let mut t = *self; - self.elementwise_square(); - t.elementwise_mul_assign(&*self); - self.elementwise_square(); - self.elementwise_mul_assign(&t); - } - - #[inline(always)] - fn elementwise_mul_assign(&mut self, other: &Self) { - Self::mul_assign_impl_without_prereduction(self, other); - } - - #[inline(always)] - fn elementwise_square(&mut self) { - let t = *self; - self.elementwise_mul_assign(&t); - } - - #[inline(always)] - fn full_round(&mut self, round_counter: &mut usize) { - // add constants - self.apply_round_constants(*round_counter); - // apply non-linearity - self.apply_non_linearity(); - // multiply by MDS - self.suggested_mds_mul(); - - *round_counter += 1; - } - - #[inline(always)] - #[unroll_for_loops] - pub fn m_i_mul(&mut self) { - let mut state_u64 = Self::as_u128x4_arrays(self); - let mut rowwise_sum = 0u128; - for i in 0..3 { - rowwise_sum += state_u64.0[i].wrapping_sum(); - } - - for i in 0..3 { - state_u64.0[i] = state_u64.0[i].mul(Self::M_I_DIAGONAL_ELEMENTS[i]); - state_u64.0[i] = state_u64.0[i].add(rowwise_sum); - } - - *self = Self::from_u128x4_arrays(state_u64); - } - - #[inline(always)] - fn partial_round_poseidon2(&mut self, round_counter: &mut usize) { - // add constant - use std::ops::AddAssign; - self.0[0].add_assign(&Self::ALL_INNER_ROUND_CONSTANTS[*round_counter]); - // apply non-linearity to the single element - let mut s = GoldilocksField::from_u128_with_reduction(self.0[0]); - let mut t = s; - s.square(); - t.mul_assign(&s); - s.square(); - s.mul_assign(&t); - self.0[0] = s.as_u64() as u128; - - // multiply by MDS - self.m_i_mul(); - - *round_counter += 1; - } - - #[inline(always)] - #[unroll_for_loops] - pub fn poseidon2_permutation(&mut self) { - self.suggested_mds_mul(); - let mut round_counter = 0; - for _i in 0..4 { - self.full_round(&mut round_counter); - } - for i in 0..22 { - self.partial_round_poseidon2(&mut round_counter); - - if i % 3 == 1 { - for j in 0..12 { - self.0[j] = - GoldilocksField::from_u128_with_reduction(self.0[j]).as_u64() as u128; - } - } - } - for _i in 0..4 { - self.full_round(&mut round_counter); - } - - for i in 0..12 { - self.0[i] = GoldilocksField::from_u128_with_reduction(self.0[i]).as_u64() as u128; - } - } -} - -#[inline(always)] -pub fn poseidon2_permutation(state: &mut [GoldilocksField; State::STATE_WIDTH]) { - let mut state_vec = State::from_field_array(*state); - state_vec.poseidon2_permutation(); - *state = state_vec.as_field_array(); -} - -#[cfg(test)] -mod test { - - use crate::field::rand_from_rng; - use crate::field::{goldilocks::GoldilocksField, Field}; - use crate::implementations::poseidon2::State; - use crate::implementations::poseidon_goldilocks_naive; - use crate::implementations::suggested_mds; - - //test for apply_round_constants - #[test] - fn test_apply_round_constants() { - let mut rng = rand::thread_rng(); - let mut state = [GoldilocksField::ONE; 12]; - - for i in 0..state.len() { - state[i] = rand_from_rng(&mut rng); - } - dbg!(state); - - let mut state_ref = state; - poseidon_goldilocks_naive::apply_round_constants(&mut state_ref, 0); - - let mut state_vec = State::from_field_array(state); - state_vec.apply_round_constants(0); - - // dbg!(&state_vec); - - assert_eq!(state_ref, state_vec.as_field_array()); - } - - //test for apply_non_linearity - #[test] - fn test_apply_non_linearity() { - let mut rng = rand::thread_rng(); - let mut state = [GoldilocksField::ONE; 12]; - - for i in 0..state.len() { - state[i] = rand_from_rng(&mut rng); - } - dbg!(state); - - let mut state_ref = state; - for i in 0..12 { - poseidon_goldilocks_naive::apply_non_linearity(&mut state_ref[i]); - } - - let mut state_vec = State::from_field_array(state); - state_vec.apply_non_linearity(); - - // dbg!(&state_vec); - - assert_eq!(state_ref, state_vec.as_field_array()); - } - - //test for suggested_mds_mul - #[test] - fn test_suggested_mds_mul() { - let mut rng = rand::thread_rng(); - let mut state = [GoldilocksField::ONE; 12]; - - for i in 0..state.len() { - state[i] = rand_from_rng(&mut rng); - } - dbg!(state); - - let mut state_ref = state; - suggested_mds::suggested_mds_mul(&mut state_ref); - - let mut state_vec = State::from_field_array(state); - state_vec.suggested_mds_mul(); - - // dbg!(&state_vec); - - assert_eq!(state_ref, state_vec.as_field_array()); - } - - //test for poseidon2_permutation - #[test] - fn test_poseidon2_permutation() { - let mut rng = rand::thread_rng(); - let mut state = [GoldilocksField::ONE; 12]; - - for i in 0..state.len() { - state[i] = rand_from_rng(&mut rng); - } - - let state = [GoldilocksField(GoldilocksField::ORDER - 1); 12]; - dbg!(state); - - let mut state_ref = State::from_field_array(state); - State::poseidon2_permutation(&mut state_ref); - - let mut state_vec = State::from_field_array(state); - state_vec.poseidon2_permutation(); - - assert_eq!(state_ref, state_vec); - } -} diff --git a/src/lib.rs b/src/lib.rs index 6978fa7..a8bdc38 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,7 @@ clippy::needless_pass_by_ref_mut, // Mutable references are often used indirectly (e.g. via unsafe code). clippy::int_plus_one, // Suggests less expressive code. clippy::bool_assert_comparison, // This crate prefers explicitness. + clippy::derived_hash_with_manual_eq, )] #![allow(dead_code)] #![allow(dropping_references)] // Required to explicitly show that mutable references are dropped. @@ -53,7 +54,6 @@ #![feature(vec_push_within_capacity)] #![feature(return_position_impl_trait_in_trait)] #![feature(type_changing_struct_update)] -#![cfg_attr(feature = "include_packed_simd", feature(stdsimd))] pub mod algebraic_props; pub mod config;