Skip to content

Commit

Permalink
AVX2 implementations of Barrett reduction, the NTT, and deserializati…
Browse files Browse the repository at this point in the history
…on for some parameters. (#270)

* NTT and Barrett reduce
* Inverse NTT functions
* inv_ntt_layer_1_step
* deserialize_1
* deserialize_4
* decompress -> decompress_ciphertext_coefficient for clarity
* deserialize_12
* deserialize_10
* ntt_multiply
  • Loading branch information
xvzcf authored May 13, 2024
1 parent 4b2e25e commit d6afc5f
Show file tree
Hide file tree
Showing 8 changed files with 461 additions and 580 deletions.
8 changes: 4 additions & 4 deletions libcrux-ml-kem/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ fn deserialize_then_decompress_10<Vector: Operations>(
cloop! {
for (i, bytes) in serialized.chunks_exact(20).enumerate() {
let coefficient = Vector::deserialize_10(bytes);
re.coefficients[i] = Vector::decompress::<10>(coefficient);
re.coefficients[i] = Vector::decompress_ciphertext_coefficient::<10>(coefficient);
}
}
re
Expand All @@ -234,7 +234,7 @@ fn deserialize_then_decompress_11<Vector: Operations>(
cloop! {
for (i, bytes) in serialized.chunks_exact(22).enumerate() {
let coefficient = Vector::deserialize_11(bytes);
re.coefficients[i] = Vector::decompress::<11>(coefficient);
re.coefficients[i] = Vector::decompress_ciphertext_coefficient::<11>(coefficient);
}
}

Expand Down Expand Up @@ -266,7 +266,7 @@ fn deserialize_then_decompress_4<Vector: Operations>(
cloop! {
for (i, bytes) in serialized.chunks_exact(8).enumerate() {
let coefficient = Vector::deserialize_4(bytes);
re.coefficients[i] = Vector::decompress::<4>(coefficient);
re.coefficients[i] = Vector::decompress_ciphertext_coefficient::<4>(coefficient);
}
}
re
Expand All @@ -283,7 +283,7 @@ fn deserialize_then_decompress_5<Vector: Operations>(
cloop! {
for (i, bytes) in serialized.chunks_exact(10).enumerate() {
re.coefficients[i] = Vector::deserialize_5(bytes);
re.coefficients[i] = Vector::decompress::<5>(re.coefficients[i]);
re.coefficients[i] = Vector::decompress_ciphertext_coefficient::<5>(re.coefficients[i]);
}
}
re
Expand Down
4 changes: 2 additions & 2 deletions polynomials-aarch64/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ impl Operations for SIMD128Vector {
compress::<COEFFICIENT_BITS>(v)
}

fn decompress<const COEFFICIENT_BITS: i32>(v: Self) -> Self {
decompress::<COEFFICIENT_BITS>(v)
fn decompress_ciphertext_coefficient<const COEFFICIENT_BITS: i32>(v: Self) -> Self {
decompress_ciphertext_coefficient::<COEFFICIENT_BITS>(v)
}

fn ntt_layer_1_step(a: Self, zeta1: i16, zeta2: i16, zeta3: i16, zeta4: i16) -> Self {
Expand Down
4 changes: 3 additions & 1 deletion polynomials-aarch64/src/simd128ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,9 @@ fn decompress_uint32x4_t<const COEFFICIENT_BITS: i32>(v: uint32x4_t) -> uint32x4
}

#[inline(always)]
pub(crate) fn decompress<const COEFFICIENT_BITS: i32>(mut v: SIMD128Vector) -> SIMD128Vector {
pub(crate) fn decompress_ciphertext_coefficient<const COEFFICIENT_BITS: i32>(
mut v: SIMD128Vector,
) -> SIMD128Vector {
let mask16 = _vdupq_n_u32(0xffff);
let low0 = _vandq_u32(_vreinterpretq_u32_s16(v.low), mask16);
let low1 = _vshrq_n_u32::<16>(_vreinterpretq_u32_s16(v.low));
Expand Down
14 changes: 10 additions & 4 deletions polynomials-avx2/src/debug.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,25 @@ use core::arch::x86::*;
use core::arch::x86_64::*;

#[allow(dead_code)]
fn print_m256i_as_i16s(a: __m256i, prefix: &'static str) {
pub(crate) fn print_m256i_as_i16s(a: __m256i, prefix: &'static str) {
let mut a_bytes = [0i16; 16];
unsafe { _mm256_store_si256(a_bytes.as_mut_ptr() as *mut __m256i, a) };
println!("{}: {:04x?}", prefix, a_bytes);
println!("{}: {:?}", prefix, a_bytes);
}
#[allow(dead_code)]
pub(crate) fn print_m256i_as_i32s(a: __m256i, prefix: &'static str) {
let mut a_bytes = [0i32; 8];
unsafe { _mm256_store_si256(a_bytes.as_mut_ptr() as *mut __m256i, a) };
println!("{}: {:?}", prefix, a_bytes);
}
#[allow(dead_code)]
fn print_m128i_as_i16s(a: __m128i, prefix: &'static str) {
pub(crate) fn print_m128i_as_i16s(a: __m128i, prefix: &'static str) {
let mut a_bytes = [0i16; 8];
unsafe { _mm_store_si128(a_bytes.as_mut_ptr() as *mut __m128i, a) };
println!("{}: {:?}", prefix, a_bytes);
}
#[allow(dead_code)]
fn print_m128i_as_i8s(a: __m128i, prefix: &'static str) {
pub(crate) fn print_m128i_as_i8s(a: __m128i, prefix: &'static str) {
let mut a_bytes = [0i8; 16];
unsafe { _mm_store_si128(a_bytes.as_mut_ptr() as *mut __m128i, a) };
println!("{}: {:?}", prefix, a_bytes);
Expand Down
Loading

0 comments on commit d6afc5f

Please sign in to comment.