PolyhedraZK · niconiconi · Aug 27, 2024 · Aug 27, 2024 · Aug 27, 2024 · Aug 27, 2024
diff --git a/arith/Cargo.toml b/arith/Cargo.toml
@@ -19,4 +19,8 @@ criterion.workspace = true
 name = "field"
 harness = false
 
+[[bench]]
+name = "ext_field"
+harness = false
+
 [features]
diff --git a/arith/benches/ext_field.rs b/arith/benches/ext_field.rs
@@ -0,0 +1,160 @@
+use arith::{ExtensionField, Field, GF2_128x4, M31Ext3, M31Ext3x16, GF2_128};
+use ark_std::test_rng;
+use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
+use tynm::type_name;
+
+fn random_element<F: Field>() -> F {
+    let mut rng = test_rng();
+    F::random_unsafe(&mut rng)
+}
+
+pub(crate) fn bench_field<F: Field + ExtensionField>(c: &mut Criterion) {
+    c.bench_function(
+        &format!(
+            "mul-by-base-throughput<{}> 100x times {}x ",
+            type_name::<F>(),
+            F::SIZE * 8 / F::FIELD_SIZE
+        ),
+        |b| {
+            b.iter_batched(
+                || {
+                    (
+                        random_element::<F>(),
+                        random_element::<F>(),
+                        random_element::<F>(),
+                        random_element::<F>(),
+                        random_element::<F::BaseField>(),
+                        random_element::<F::BaseField>(),
+                        random_element::<F::BaseField>(),
+                        random_element::<F::BaseField>(),
+                    )
+                },
+                |(mut x, mut y, mut z, mut w, xx, yy, zz, ww)| {
+                    for _ in 0..25 {
+                        (x, y, z, w) = (
+                            x.mul_by_base_field(&xx),
+                            y.mul_by_base_field(&yy),
+                            z.mul_by_base_field(&zz),
+                            w.mul_by_base_field(&ww),
+                        );
+                    }
+                    (x, y, z, w)
+                },
+                BatchSize::SmallInput,
+            )
+        },
+    );
+
+    c.bench_function(
+        &format!(
+            "mul-by-x-throughput<{}> 100x times {}x ",
+            type_name::<F>(),
+            F::SIZE * 8 / F::FIELD_SIZE
+        ),
+        |b| {
+            b.iter_batched(
+                || {
+                    (
+                        random_element::<F>(),
+                        random_element::<F>(),
+                        random_element::<F>(),
+                        random_element::<F>(),
+                    )
+                },
+                |(mut x, mut y, mut z, mut w)| {
+                    for _ in 0..25 {
+                        (x, y, z, w) = (x.mul_by_x(), y.mul_by_x(), z.mul_by_x(), w.mul_by_x());
+                    }
+                    (x, y, z, w)
+                },
+                BatchSize::SmallInput,
+            )
+        },
+    );
+
+    c.bench_function(
+        &format!(
+            "mul-by-base-latency<{}> 100x times {}x ",
+            type_name::<F>(),
+            F::SIZE * 8 / F::FIELD_SIZE
+        ),
+        |b| {
+            b.iter_batched(
+                || (random_element::<F>(), random_element::<F::BaseField>()),
+                |(mut x, xx)| {
+                    for _ in 0..100 {
+                        x = x.mul_by_base_field(&xx);
+                    }
+                    x
+                },
+                BatchSize::SmallInput,
+            )
+        },
+    );
+
+    c.bench_function(
+        &format!(
+            "add-by-base-throughput<{}> 100x times {}x ",
+            type_name::<F>(),
+            F::SIZE * 8 / F::FIELD_SIZE
+        ),
+        |b| {
+            b.iter_batched(
+                || {
+                    (
+                        random_element::<F>(),
+                        random_element::<F>(),
+                        random_element::<F>(),
+                        random_element::<F>(),
+                        random_element::<F::BaseField>(),
+                        random_element::<F::BaseField>(),
+                        random_element::<F::BaseField>(),
+                        random_element::<F::BaseField>(),
+                    )
+                },
+                |(mut x, mut y, mut z, mut w, xx, yy, zz, ww)| {
+                    for _ in 0..25 {
+                        (x, y, z, w) = (
+                            x.add_by_base_field(&xx),
+                            y.add_by_base_field(&yy),
+                            z.add_by_base_field(&zz),
+                            w.add_by_base_field(&ww),
+                        );
+                    }
+                    (x, y, z, w)
+                },
+                BatchSize::SmallInput,
+            )
+        },
+    );
+
+    c.bench_function(
+        &format!(
+            "add-by-base-latency<{}> 100x times {}x ",
+            type_name::<F>(),
+            F::SIZE * 8 / F::FIELD_SIZE
+        ),
+        |b| {
+            b.iter_batched(
+                || (random_element::<F>(), random_element::<F::BaseField>()),
+                |(mut x, xx)| {
+                    for _ in 0..100 {
+                        x = x.add_by_base_field(&xx);
+                    }
+                    x
+                },
+                BatchSize::SmallInput,
+            )
+        },
+    );
+}
+
+fn ext_by_base_benchmark(c: &mut Criterion) {
+    bench_field::<M31Ext3>(c);
+    bench_field::<M31Ext3x16>(c);
+    bench_field::<GF2_128>(c);
+    bench_field::<GF2_128x4>(c);
+}
+
+criterion_group!(ext_by_base_benches, ext_by_base_benchmark);
+criterion_main!(ext_by_base_benches);
diff --git a/arith/src/extension_field.rs b/arith/src/extension_field.rs
@@ -11,17 +11,19 @@ pub use m31_ext::M31Ext3;
 pub use m31_ext3x16::M31Ext3x16;
 
 /// Configurations for Extension Field over
-/// the Binomial polynomial x^DEGREE - W
+/// - either the Binomial polynomial x^DEGREE - W
+/// - or the AES polynomial x^128 + x^7 + x^2 + x + 1
 //
-// FIXME: Our binary extension field is no longer a binomial extension field
-// will fix later
-pub trait BinomialExtensionField: From<Self::BaseField> + Field + FieldSerde {
+pub trait ExtensionField: From<Self::BaseField> + Field + FieldSerde {
     /// Degree of the Extension
     const DEGREE: usize;
 
-    /// Extension Field
+    /// constant term if the extension field is represented as a binomial polynomial
     const W: u32;
 
+    /// x, i.e, 0 + x + 0 x^2 + 0 x^3 + ...
+    const X: Self;
+
     /// Base field for the extension
     type BaseField: Field + FieldSerde + Send;
 
@@ -30,4 +32,7 @@ pub trait BinomialExtensionField: From<Self::BaseField> + Field + FieldSerde {
 
     /// Add the extension field with the base field
     fn add_by_base_field(&self, base: &Self::BaseField) -> Self;
+
+    /// Multiply the extension field element by x, i.e, 0 + x + 0 x^2 + 0 x^3 + ...
+    fn mul_by_x(&self) -> Self;
 }
diff --git a/arith/src/extension_field/fr_ext.rs b/arith/src/extension_field/fr_ext.rs
@@ -1,13 +1,16 @@
 use halo2curves::bn256::Fr;
 
-use super::BinomialExtensionField;
+use super::ExtensionField;
 
-impl BinomialExtensionField for Fr {
+impl ExtensionField for Fr {
     const DEGREE: usize = 1;
 
     /// Extension Field over X-1 which is self
     const W: u32 = 1;
 
+    // placeholder, doesn't make sense for Fr
+    const X: Self = Fr::zero();
+
     /// Base field for the extension
     type BaseField = Self;
 
@@ -20,4 +23,9 @@ impl BinomialExtensionField for Fr {
     fn add_by_base_field(&self, base: &Self::BaseField) -> Self {
         self + base
     }
+
+    /// Multiply the extension field by x, i.e, 0 + x + 0 x^2 + 0 x^3 + ...
+    fn mul_by_x(&self) -> Self {
+        unimplemented!("mul_by_x for Fr doesn't make sense")
+    }
 }
diff --git a/arith/src/extension_field/gf2_128/avx.rs b/arith/src/extension_field/gf2_128/avx.rs
@@ -5,7 +5,7 @@ use std::{
     ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign},
 };
 
-use crate::{field_common, BinomialExtensionField, Field, FieldSerde, FieldSerdeResult, GF2};
+use crate::{field_common, ExtensionField, Field, FieldSerde, FieldSerdeResult, GF2};
 
 #[derive(Debug, Clone, Copy)]
 pub struct AVX512GF2_128 {
@@ -48,13 +48,19 @@ impl FieldSerde for AVX512GF2_128 {
 
 impl Field for AVX512GF2_128 {
     const NAME: &'static str = "Galios Field 2^128";
+
     const SIZE: usize = 128 / 8;
+
     const FIELD_SIZE: usize = 128; // in bits
 
     const ZERO: Self = AVX512GF2_128 {
         v: unsafe { std::mem::zeroed() },
     };
 
+    const ONE: Self = AVX512GF2_128 {
+        v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([1, 0, 0, 0]) },
+    };
+
     const INV_2: Self = AVX512GF2_128 {
         v: unsafe { std::mem::zeroed() },
     }; // should not be used
@@ -141,10 +147,15 @@ impl Field for AVX512GF2_128 {
     }
 }
 
-impl BinomialExtensionField for AVX512GF2_128 {
+impl ExtensionField for AVX512GF2_128 {
     const DEGREE: usize = 128;
+
     const W: u32 = 0x87;
 
+    const X: Self = AVX512GF2_128 {
+        v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([2, 0, 0, 0]) },
+    };
+
     type BaseField = GF2;
 
     #[inline(always)]
@@ -162,6 +173,37 @@ impl BinomialExtensionField for AVX512GF2_128 {
         res.v = unsafe { _mm_xor_si128(res.v, _mm_set_epi64x(0, base.v as i64)) };
         res
     }
+
+    #[inline]
+    fn mul_by_x(&self) -> Self {
+        unsafe {
+            // Shift left by 1 bit
+            let shifted = _mm_slli_epi64(self.v, 1);
+
+            // Get the most significant bit and move it
+            let msb = _mm_srli_epi64(self.v, 63);
+            let msb_moved = _mm_slli_si128(msb, 8);
+
+            // Combine the shifted value with the moved msb
+            let shifted_consolidated = _mm_or_si128(shifted, msb_moved);
+
+            // Create the reduction value (0x87) and the comparison value (1)
+            let reduction = {
+                let multiplier = _mm_set_epi64x(0, 0x87);
+                let one = _mm_set_epi64x(0, 1);
+
+                // Check if the MSB was 1 and create a mask
+                let mask = _mm_cmpeq_epi64(_mm_srli_si128(msb, 8), one);
+
+                _mm_and_si128(mask, multiplier)
+            };
+
+            // Apply the reduction conditionally
+            let res = _mm_xor_si128(shifted_consolidated, reduction);
+
+            Self { v: res }
+        }
+    }
 }
 
 impl From<GF2> for AVX512GF2_128 {