diff --git a/benchmarks/bench_ec_g1_msm_bls12_381.nim b/benchmarks/bench_ec_g1_msm_bls12_381.nim
index 135f65099..da09bd902 100644
--- a/benchmarks/bench_ec_g1_msm_bls12_381.nim
+++ b/benchmarks/bench_ec_g1_msm_bls12_381.nim
@@ -32,7 +32,7 @@ const AvailableCurves = [
 ]
 
 # const testNumPoints = [10, 100, 1000, 10000, 100000]
-const testNumPoints = [8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192,
+const testNumPoints = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192,
                        16384, 32768, 65536, 131072, 262144]
 
 proc main() =
diff --git a/benchmarks/bench_ec_g1_scalar_mul.nim b/benchmarks/bench_ec_g1_scalar_mul.nim
index ec941039b..04d2173b2 100644
--- a/benchmarks/bench_ec_g1_scalar_mul.nim
+++ b/benchmarks/bench_ec_g1_scalar_mul.nim
@@ -44,7 +44,7 @@ proc main() =
   separator()
   staticFor i, 0, AvailableCurves.len:
     const curve = AvailableCurves[i]
-    const bits = 64 # curve.getCurveOrderBitwidth()
+    const bits = curve.getCurveOrderBitwidth()
     scalarMulUnsafeDoubleAddBench(ECP_ShortW_Prj[Fp[curve], G1], bits, MulIters)
     scalarMulUnsafeDoubleAddBench(ECP_ShortW_Jac[Fp[curve], G1], bits, MulIters)
     separator()
diff --git a/benchmarks/bench_ec_g2_scalar_mul.nim b/benchmarks/bench_ec_g2_scalar_mul.nim
index 8b46ae2d5..16840b5bc 100644
--- a/benchmarks/bench_ec_g2_scalar_mul.nim
+++ b/benchmarks/bench_ec_g2_scalar_mul.nim
@@ -45,7 +45,7 @@ proc main() =
   separator()
   staticFor i, 0, AvailableCurves.len:
     const curve = AvailableCurves[i]
-    const bits = 64 # curve.getCurveOrderBitwidth()
+    const bits = curve.getCurveOrderBitwidth()
     scalarMulUnsafeDoubleAddBench(ECP_ShortW_Prj[Fp2[curve], G2], bits, MulIters)
     scalarMulUnsafeDoubleAddBench(ECP_ShortW_Jac[Fp2[curve], G2], bits, MulIters)
     separator()
diff --git a/benchmarks/bench_blssig_on_bls12_381_g2.nim b/benchmarks/bench_ethereum_bls_signatures.nim
similarity index 93%
rename from benchmarks/bench_blssig_on_bls12_381_g2.nim
rename to benchmarks/bench_ethereum_bls_signatures.nim
index 1a990d514..aec902d67 100644
--- a/benchmarks/bench_blssig_on_bls12_381_g2.nim
+++ b/benchmarks/bench_ethereum_bls_signatures.nim
@@ -9,7 +9,7 @@
 import
   # Internals
   ../constantine/[
-    blssig_pop_on_bls12381_g2,
+    ethereum_bls_signatures,
     ethereum_eip2333_bls12381_key_derivation],
   ../constantine/math/arithmetic,
   # Helpers
@@ -33,10 +33,10 @@ template bench(op: string, curve: string, iters: int, body: untyped): untyped =
 proc demoKeyGen(): tuple[seckey: SecretKey, pubkey: PublicKey] =
   # Don't do this at home, this is for benchmarking purposes
   # The RNG is NOT cryptographically secure
-  # The API for keygen is not ready in blssig_pop_on_bls12381_g2
+  # The API for keygen is not ready in ethereum_bls_signatures
   let ikm = rng.random_byte_seq(32)
   doAssert cast[ptr BigInt[255]](result.seckey.addr)[].derive_master_secretKey(ikm)
-  let ok = result.pubkey.derive_public_key(result.seckey)
+  let ok = result.pubkey.derive_pubkey(result.seckey)
   doAssert ok == cttBLS_Success
 
 proc benchDeserPubkey*(iters: int) =
@@ -44,26 +44,26 @@ proc benchDeserPubkey*(iters: int) =
   var pk_comp{.noInit.}: array[48, byte]
 
   # Serialize compressed
-  let ok = pk_comp.serialize_public_key_compressed(pk)
+  let ok = pk_comp.serialize_pubkey_compressed(pk)
   doAssert ok == cttBLS_Success
 
   var pk2{.noInit.}: PublicKey
 
   bench("Pubkey deserialization (full checks)", "BLS12_381 G1", iters):
-    let status = pk2.deserialize_public_key_compressed(pk_comp)
+    let status = pk2.deserialize_pubkey_compressed(pk_comp)
 
 proc benchDeserPubkeyUnchecked*(iters: int) =
   let (sk, pk) = demoKeyGen()
   var pk_comp{.noInit.}: array[48, byte]
 
   # Serialize compressed
-  let ok = pk_comp.serialize_public_key_compressed(pk)
+  let ok = pk_comp.serialize_pubkey_compressed(pk)
   doAssert ok == cttBLS_Success
 
   var pk2{.noInit.}: PublicKey
 
   bench("Pubkey deserialization (skip checks)", "BLS12_381 G1", iters):
-    let status = pk2.deserialize_public_key_compressed_unchecked(pk_comp)
+    let status = pk2.deserialize_pubkey_compressed_unchecked(pk_comp)
 
 proc benchDeserSig*(iters: int) =
   let (sk, pk) = demoKeyGen()
@@ -139,7 +139,7 @@ proc benchFastAggregateVerify*(numKeys, iters: int) =
     let status = sigs[i].sign(sk, msg)
     doAssert status == cttBLS_Success
 
-  aggSig.aggregate_signatures(sigs)
+  aggSig.aggregate_signatures_unstable_api(sigs)
 
   bench("BLS agg verif of 1 msg by " & $numKeys & " pubkeys", "BLS12_381", iters):
     let valid = validators.fast_aggregate_verify(msg, aggSig)
diff --git a/bindings/README.md b/bindings_generators/README.md
similarity index 100%
rename from bindings/README.md
rename to bindings_generators/README.md
diff --git a/bindings/constantine_bls12_381.nim b/bindings_generators/constantine_bls12_381.nim
similarity index 100%
rename from bindings/constantine_bls12_381.nim
rename to bindings_generators/constantine_bls12_381.nim
diff --git a/bindings/constantine_pasta.nim b/bindings_generators/constantine_pasta.nim
similarity index 100%
rename from bindings/constantine_pasta.nim
rename to bindings_generators/constantine_pasta.nim
diff --git a/bindings/gen_bindings.nim b/bindings_generators/gen_bindings.nim
similarity index 90%
rename from bindings/gen_bindings.nim
rename to bindings_generators/gen_bindings.nim
index 59e9e9df9..e23fc155f 100644
--- a/bindings/gen_bindings.nim
+++ b/bindings_generators/gen_bindings.nim
@@ -18,8 +18,11 @@ export curves, curves_primitives
 # This files provides template for C bindings generation
 
 template genBindingsField*(Field: untyped) =
-  {.push cdecl, dynlib, exportc,  raises: [].} # No exceptions allowed
-  
+  when appType == "lib":
+    {.push cdecl, dynlib, exportc,  raises: [].} # No exceptions allowed
+  else:
+    {.push cdecl, exportc,  raises: [].} # No exceptions allowed
+
   func `ctt _ Field _ unmarshalBE`(dst: var Field, src: openarray[byte]) =
     ## Deserialize
     unmarshalBE(dst, src)
@@ -77,7 +80,7 @@ template genBindingsField*(Field: untyped) =
 
   func `ctt _ Field _ mul_in_place`(a: var Field, b: Field) =
     a *= b
-  
+
   func `ctt _ Field _ square`(r: var Field, a: Field) =
     r.square(a)
 
@@ -86,10 +89,10 @@ template genBindingsField*(Field: untyped) =
   # --------------------------------------------------------------------------------------
   func `ctt _ Field _ div2`(a: var Field) =
     a.div2()
-  
+
   func `ctt _ Field _ inv`(r: var Field, a: Field) =
     r.inv(a)
-  
+
   func `ctt _ Field _ inv_in_place`(a: var Field) =
     a.inv()
   # --------------------------------------------------------------------------------------
@@ -98,10 +101,10 @@ template genBindingsField*(Field: untyped) =
 
   func `ctt _ Field _ cswap`(a, b: var Field, ctl: SecretBool) =
     a.cswap(b, ctl)
-  
+
   func `ctt _ Field _ cset_zero`(a: var Field, ctl: SecretBool) =
     a.csetZero(ctl)
-  
+
   func `ctt _ Field _ cset_one`(a: var Field, ctl: SecretBool) =
     a.csetOne(ctl)
 
@@ -118,7 +121,10 @@ template genBindingsField*(Field: untyped) =
 
 
 template genBindingsFieldSqrt*(Field: untyped) =
-  {.push cdecl, dynlib, exportc,  raises: [].} # No exceptions allowed
+  when appType == "lib":
+    {.push cdecl, dynlib, exportc,  raises: [].} # No exceptions allowed
+  else:
+    {.push cdecl, exportc,  raises: [].} # No exceptions allowed
 
   func `ctt _ Field _ is_square`(a: Field): SecretBool =
     a.isSquare()
@@ -148,7 +154,10 @@ template genBindingsFieldSqrt*(Field: untyped) =
 
 
 template genBindingsExtField*(Field: untyped) =
-  {.push cdecl, dynlib, exportc,  raises: [].} # No exceptions allowed
+  when appType == "lib":
+    {.push cdecl, dynlib, exportc,  raises: [].} # No exceptions allowed
+  else:
+    {.push cdecl, exportc,  raises: [].} # No exceptions allowed
 
   # --------------------------------------------------------------------------------------
   func `ctt _ Field _ is_eq`(a, b: Field): SecretBool =
@@ -195,13 +204,13 @@ template genBindingsExtField*(Field: untyped) =
 
   func `ctt _ Field _ conj`(r: var Field, a: Field) =
     r.conj(a)
-  
+
   func `ctt _ Field _ conj_in_place`(a: var Field) =
     a.conj()
 
   func `ctt _ Field _ conjneg`(r: var Field, a: Field) =
     r.conjneg(a)
-  
+
   func `ctt _ Field _ conjneg_in_place`(a: var Field) =
     a.conjneg()
 
@@ -211,7 +220,7 @@ template genBindingsExtField*(Field: untyped) =
 
   func `ctt _ Field _ mul_in_place`(a: var Field, b: Field) =
     a *= b
-  
+
   func `ctt _ Field _ square`(r: var Field, a: Field) =
     r.square(a)
 
@@ -220,10 +229,10 @@ template genBindingsExtField*(Field: untyped) =
   # --------------------------------------------------------------------------------------
   func `ctt _ Field _ div2`(a: var Field) =
     a.div2()
-  
+
   func `ctt _ Field _ inv`(r: var Field, a: Field) =
     r.inv(a)
-  
+
   func `ctt _ Field _ inv_in_place`(a: var Field) =
     a.inv()
   # --------------------------------------------------------------------------------------
@@ -232,7 +241,7 @@ template genBindingsExtField*(Field: untyped) =
 
   func `ctt _ Field _ cset_zero`(a: var Field, ctl: SecretBool) =
     a.csetZero(ctl)
-  
+
   func `ctt _ Field _ cset_one`(a: var Field, ctl: SecretBool) =
     a.csetOne(ctl)
 
@@ -248,7 +257,10 @@ template genBindingsExtField*(Field: untyped) =
   {.pop.}
 
 template genBindingsExtFieldSqrt*(Field: untyped) =
-  {.push cdecl, dynlib, exportc,  raises: [].} # No exceptions allowed
+  when appType == "lib":
+    {.push cdecl, dynlib, exportc,  raises: [].} # No exceptions allowed
+  else:
+    {.push cdecl, exportc,  raises: [].} # No exceptions allowed
 
   func `ctt _ Field _ is_square`(a: Field): SecretBool =
     a.isSquare()
@@ -262,12 +274,15 @@ template genBindingsExtFieldSqrt*(Field: untyped) =
   {.pop}
 
 template genBindings_EC_ShortW_Affine*(ECP, Field: untyped) =
-  {.push cdecl, dynlib, exportc,  raises: [].} # No exceptions allowed
+  when appType == "lib":
+    {.push cdecl, dynlib, exportc,  raises: [].} # No exceptions allowed
+  else:
+    {.push cdecl, exportc,  raises: [].} # No exceptions allowed
 
   # --------------------------------------------------------------------------------------
   func `ctt _ ECP _ is_eq`(P, Q: ECP): SecretBool =
     P == Q
-  
+
   func `ctt _ ECP _ is_inf`(P: ECP): SecretBool =
     P.isInf()
 
@@ -276,7 +291,7 @@ template genBindings_EC_ShortW_Affine*(ECP, Field: untyped) =
 
   func `ctt _ ECP _ ccopy`(P: var ECP, Q: ECP, ctl: SecretBool) =
     P.ccopy(Q, ctl)
-  
+
   func `ctt _ ECP _ is_on_curve`(x, y: Field): SecretBool =
     isOnCurve(x, y, ECP.G)
 
@@ -289,12 +304,15 @@ template genBindings_EC_ShortW_Affine*(ECP, Field: untyped) =
   {.pop.}
 
 template genBindings_EC_ShortW_NonAffine*(ECP, ECP_Aff, Field: untyped) =
-  {.push cdecl, dynlib, exportc,  raises: [].} # No exceptions allowed
+  when appType == "lib":
+    {.push cdecl, dynlib, exportc,  raises: [].} # No exceptions allowed
+  else:
+    {.push cdecl, exportc,  raises: [].} # No exceptions allowed
 
   # --------------------------------------------------------------------------------------
   func `ctt _ ECP _ is_eq`(P, Q: ECP): SecretBool =
     P == Q
-  
+
   func `ctt _ ECP _ is_inf`(P: ECP): SecretBool =
     P.isInf()
 
@@ -303,7 +321,7 @@ template genBindings_EC_ShortW_NonAffine*(ECP, ECP_Aff, Field: untyped) =
 
   func `ctt _ ECP _ ccopy`(P: var ECP, Q: ECP, ctl: SecretBool) =
     P.ccopy(Q, ctl)
-  
+
   func `ctt _ ECP _ neg`(P: var ECP, Q: ECP) =
     P.neg(Q)
 
@@ -327,7 +345,7 @@ template genBindings_EC_ShortW_NonAffine*(ECP, ECP_Aff, Field: untyped) =
 
   func `ctt _ ECP _ double_in_place`(P: var ECP) =
     P.double()
-  
+
   func `ctt _ ECP _ affine`(dst: var ECP_Aff, src: ECP) =
     dst.affine(src)
 
diff --git a/bindings/gen_header.nim b/bindings_generators/gen_header.nim
similarity index 89%
rename from bindings/gen_header.nim
rename to bindings_generators/gen_header.nim
index 1220312d2..e7667bc46 100644
--- a/bindings/gen_header.nim
+++ b/bindings_generators/gen_header.nim
@@ -16,14 +16,13 @@ import
 
 proc genHeaderLicense*(): string =
   """
-/*
- * Constantine
- * Copyright (c) 2018-2019    Status Research & Development GmbH
- * Copyright (c) 2020-Present Mamy André-Ratsimbazafy
- * Licensed and distributed under either of
- *   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
- *   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
- * at your option. This file may not be copied, modified, or distributed except according to those terms.
+/** Constantine
+ *  Copyright (c) 2018-2019    Status Research & Development GmbH
+ *  Copyright (c) 2020-Present Mamy André-Ratsimbazafy
+ *  Licensed and distributed under either of
+ *    * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+ *    * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+ *  at your option. This file may not be copied, modified, or distributed except according to those terms.
  */
 """
 
@@ -102,7 +101,7 @@ proc declNimMain*(libName: string): string =
   ## - the Nim runtime if seqs, strings or heap-allocated types are used,
   ##   this is the case only if Constantine is multithreaded.
   ## - runtime CPU features detection
-  ## 
+  ##
   ## Assumes library is compiled with --nimMainPrefix:ctt_{libName}_
   &"""
 
@@ -124,9 +123,9 @@ proc toCrettype(node: NimNode): string =
   node.expectKind({nnkEmpty, nnkSym})
   if node.kind == nnkEmpty:
     # align iwth secret_bool and secret_word
-    "void       "  
+    "void       "
   else:
-    TypeMap[$node] 
+    TypeMap[$node]
 
 proc toCtrivialParam(name: string, typ: NimNode): string =
   typ.expectKind({nnkVarTy, nnkSym})
@@ -181,16 +180,16 @@ macro collectBindings*(cBindingsStr: untyped, body: typed): untyped =
     for fnDef in generator:
       if fnDef.kind notin {nnkProcDef, nnkFuncDef}:
         continue
-    
+
       cBindings &= "\n"
-      # rettype name(pType0* pName0, pType1* pName1, ...);    
+      # rettype name(pType0* pName0, pType1* pName1, ...);
       cBindings &= fnDef.params[0].toCrettype()
       cBindings &= ' '
       cBindings &= $fnDef.name
       cBindings &= '('
       for i in 1 ..< fnDef.params.len:
         if i != 1: cBindings &= ", "
-        
+
         let paramDef = fnDef.params[i]
         paramDef.expectKind(nnkIdentDefs)
         let pType = paramDef[^2]
@@ -198,7 +197,7 @@ macro collectBindings*(cBindingsStr: untyped, body: typed): untyped =
         paramDef[^1].expectKind(nnkEmpty)
 
         for j in 0 ..< paramDef.len - 2:
-          if j != 0: cBindings &= ", " 
+          if j != 0: cBindings &= ", "
           var name = $paramDef[j]
           cBindings &= toCparam(name.split('`')[0], pType)
 
diff --git a/constantine.nimble b/constantine.nimble
index d2cbfd36a..cbb6f5065 100644
--- a/constantine.nimble
+++ b/constantine.nimble
@@ -7,7 +7,185 @@ license       = "MIT or Apache License 2.0"
 # Dependencies
 # ----------------------------------------------------------------
 
-requires "nim >= 1.1.0"
+requires "nim >= 1.6.12"
+
+# Nimscript imports
+# ----------------------------------------------------------------
+
+import std/strformat
+
+# Library compilation
+# ----------------------------------------------------------------
+
+proc releaseBuildOptions: string =
+  # -d:danger --opt:size
+  #           to avoid boundsCheck and overflowChecks that would trigger exceptions or allocations in a crypto library.
+  #           Those are internally guaranteed at compile-time by fixed-sized array
+  #           and checked at runtime with an appropriate error code if any for user-input.
+  #
+  #           Furthermore we optimize for size, the performance critical procedures
+  #           either use assembly or are unrolled manually with staticFor,
+  #           Optimizations at -O3 deal with loops and branching
+  #           which we mostly don't have. It's better to optimize
+  #           for instructions cache.
+  #
+  # --panics:on -d:noSignalHandler
+  #           Even with `raises: []`, Nim still has an exception path
+  #           for defects, for example array out-of-bound accesses (though deactivated with -d:danger)
+  #           This turns them into panics, removing exceptiosn from the library.
+  #           We also remove signal handlers as it's not our business.
+  #
+  # -mm:arc -d:useMalloc
+  #           Constantine stack allocates everything (except for multithreading).
+  #           Inputs are through unmanaged ptr+len. So we don't want any runtime.
+  #           Combined with -d:useMalloc, sanitizers and valgrind work as in C,
+  #           even for test cases that needs to allocate (json inputs).
+  #
+  # -fno-semantic-interposition
+  #           https://fedoraproject.org/wiki/Changes/PythonNoSemanticInterpositionSpeedup
+  #           Default in Clang, not default in GCC, prevents optimizations, not portable to non-Linux.
+  #           Also disabling this prevents overriding symbols which might actually be wanted in a cryptographic library
+  #
+  # -falign-functions=64
+  #           Reduce instructions cache misses.
+  #           https://lkml.org/lkml/2015/5/21/443
+  #           Our non-inlined functions are large so size cost is minimal.
+  " -d:danger --opt:size " &
+  " --panics:on -d:noSignalHandler " &
+  " --mm:arc -d:useMalloc " &
+  " --verbosity:0 --hints:off --warnings:off " &
+  # " --passC:-flto --passL:-flto " &
+  " --passC:-fno-semantic-interposition " &
+  " --passC:-falign-functions=64 "
+
+type BindingsKind = enum
+  kCurve
+  kProtocol
+
+proc genDynamicBindings(bindingsKind: BindingsKind, bindingsName, prefixNimMain: string) =
+  proc compile(libName: string, flags = "") =
+    echo "Compiling dynamic library: lib/" & libName
+    exec "nim c " &
+         " --noMain --app:lib " &
+         flags &
+         releaseBuildOptions() &
+         &" --nimMainPrefix:{prefixNimMain} " &
+         &" --out:{libName} --outdir:lib " &
+         (block:
+           case bindingsKind
+           of kCurve:
+             &" --nimcache:nimcache/bindings_curves/{bindingsName}" &
+             &" bindings_generators/{bindingsName}.nim"
+           of kProtocol:
+             &" --nimcache:nimcache/bindings_protocols/{bindingsName}" &
+             &" constantine/{bindingsName}.nim")
+
+  let bindingsName = block:
+    case bindingsKind
+    of kCurve: bindingsName
+    of kProtocol: "constantine_" & bindingsName
+
+  when defined(windows):
+    compile bindingsName & ".dll"
+
+  elif defined(macosx):
+    compile "lib" & bindingsName & ".dylib.arm", "--cpu:arm64 -l:'-target arm64-apple-macos11' -t:'-target arm64-apple-macos11'"
+    compile "lib" & bindingsName & ".dylib.x64", "--cpu:amd64 -l:'-target x86_64-apple-macos10.12' -t:'-target x86_64-apple-macos10.12'"
+    exec "lipo lib/lib" & bindingsName & ".dylib.arm " &
+             " lib/lib" & bindingsName & ".dylib.x64 " &
+             " -output lib/lib" & bindingsName & ".dylib -create"
+
+  else:
+    compile "lib" & bindingsName & ".so"
+
+proc genStaticBindings(bindingsKind: BindingsKind, bindingsName, prefixNimMain: string) =
+  proc compile(libName: string, flags = "") =
+    echo "Compiling static library:  lib/" & libName
+    exec "nim c " &
+         " --noMain --app:staticLib " &
+         flags &
+         releaseBuildOptions() &
+         " --nimMainPrefix:" & prefixNimMain &
+         " --out:" & libName & " --outdir:lib " &
+         (block:
+           case bindingsKind
+           of kCurve:
+             " --nimcache:nimcache/bindings_curves/" & bindingsName &
+             " bindings_generators/" & bindingsName & ".nim"
+           of kProtocol:
+             " --nimcache:nimcache/bindings_protocols/" & bindingsName &
+             " constantine/" & bindingsName & ".nim"
+         )
+
+  let bindingsName = block:
+    case bindingsKind
+    of kCurve: bindingsName
+    of kProtocol: "constantine_" & bindingsName
+
+  when defined(windows):
+    compile bindingsName & ".lib"
+
+  elif defined(macosx):
+    compile "lib" & bindingsName & ".a.arm", "--cpu:arm64 -l:'-target arm64-apple-macos11' -t:'-target arm64-apple-macos11'"
+    compile "lib" & bindingsName & ".a.x64", "--cpu:amd64 -l:'-target x86_64-apple-macos10.12' -t:'-target x86_64-apple-macos10.12'"
+    exec "lipo lib/lib" & bindingsName & ".a.arm " &
+             " lib/lib" & bindingsName & ".a.x64 " &
+             " -output lib/lib" & bindingsName & ".a -create"
+
+  else:
+    compile "lib" & bindingsName & ".a"
+
+proc genHeaders(bindingsName: string) =
+  echo "Generating header:         include/" & bindingsName & ".h"
+  exec "nim c -d:CttGenerateHeaders " &
+       releaseBuildOptions() &
+       " --out:" & bindingsName & "_gen_header.exe --outdir:build " &
+       " --nimcache:nimcache/bindings_curves_headers/" & bindingsName & "_header" &
+       " bindings_generators/" & bindingsName & ".nim"
+  exec "build/" & bindingsName & "_gen_header.exe include"
+
+task bindings, "Generate Constantine bindings":
+  # Curve arithmetic
+  genStaticBindings(kCurve, "constantine_bls12_381", "ctt_bls12381_init_")
+  genDynamicBindings(kCurve, "constantine_bls12_381", "ctt_bls12381_init_")
+  genHeaders("constantine_bls12_381")
+  echo ""
+  genStaticBindings(kCurve, "constantine_pasta", "ctt_pasta_init_")
+  genDynamicBindings(kCurve, "constantine_pasta", "ctt_pasta_init_")
+  genHeaders("constantine_pasta")
+  echo ""
+
+  # Protocols
+  genStaticBindings(kProtocol, "ethereum_bls_signatures", "ctt_eth_bls_init_")
+  genDynamicBindings(kProtocol, "ethereum_bls_signatures", "ctt_eth_bls_init_")
+
+proc testLib(path, testName, libName: string, useGMP: bool) =
+  let dynlibName = if defined(windows): libName & ".dll"
+                   elif defined(macosx): "lib" & libName & ".dylib"
+                   else: "lib" & libName & ".so"
+  let staticlibName = if defined(windows): libName & ".lib"
+                      else: "lib" & libName & ".a"
+
+  echo &"\n[Bindings: {path}/{testName}.c] Testing dynamically linked library {dynlibName}"
+  exec &"gcc -Iinclude -Llib -o build/testbindings/{testName}_dynlink.exe {path}/{testName}.c -l{libName} " & (if useGMP: "-lgmp" else: "")
+  when defined(windows):
+    # Put DLL near the exe as LD_LIBRARY_PATH doesn't work even in a POSIX compatible shell
+    exec &"./build/testbindings/{testName}_dynlink.exe"
+  else:
+    exec &"LD_LIBRARY_PATH=lib ./build/testbindings/{testName}_dynlink.exe"
+
+
+  echo &"\n[Bindings: {path}/{testName}.c] Testing statically linked library: {staticlibName}"
+  # Beware MacOS annoying linker with regards to static libraries
+  # The following standard way cannot be used on MacOS
+  # exec "gcc -Iinclude -Llib -o build/t_libctt_bls12_381_sl.exe examples_c/t_libctt_bls12_381.c -lgmp -Wl,-Bstatic -lconstantine_bls12_381 -Wl,-Bdynamic"
+  exec &"gcc -Iinclude -o build/testbindings/{testName}_staticlink.exe {path}/{testName}.c lib/{staticlibName} " & (if useGMP: "-lgmp" else: "")
+  exec &"./build/testbindings/{testName}_staticlink.exe"
+
+task test_bindings, "Test C bindings":
+  exec "mkdir -p build/testbindings"
+  testLib("examples_c", "t_libctt_bls12_381", "constantine_bls12_381", useGMP = true)
+  testLib("examples_c", "ethereum_bls_signatures", "constantine_ethereum_bls_signatures", useGMP = false)
 
 # Test config
 # ----------------------------------------------------------------
@@ -232,7 +410,7 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
   # Protocols
   # ----------------------------------------------------------
   ("tests/t_ethereum_evm_precompiles.nim", false),
-  ("tests/t_blssig_pop_on_bls12381_g2.nim", false),
+  ("tests/t_ethereum_bls_signatures.nim", false),
   ("tests/t_ethereum_eip2333_bls12381_key_derivation.nim", false),
 ]
 
@@ -291,7 +469,7 @@ const benchDesc = [
   "bench_poly1305",
   "bench_sha256",
   "bench_hash_to_curve",
-  "bench_blssig_on_bls12_381_g2"
+  "bench_ethereum_bls_signatures"
 ]
 
 # For temporary (hopefully) investigation that can only be reproduced in CI
@@ -300,22 +478,9 @@ const useDebug = [
   "tests/math/t_hash_sha256_vs_openssl.nim",
 ]
 
-# Tests that uses sequences require Nim GC, stack scanning and nil pointer passed to openarray
-# In particular the tests that uses the json test vectors, don't sanitize them.
-# we do use gc:none to help
+# Skip sanitizers for specific tests
 const skipSanitizers = [
-  "tests/math/t_ec_sage_bn254_nogami.nim",
-  "tests/math/t_ec_sage_bn254_snarks.nim",
-  "tests/math/t_ec_sage_bls12_377.nim",
-  "tests/math/t_ec_sage_bls12_381.nim",
-  "tests/t_blssig_pop_on_bls12381_g2.nim",
-  "tests/t_hash_to_field.nim",
-  "tests/t_hash_to_curve.nim",
-  "tests/t_hash_to_curve_random.nim",
-  "tests/t_mac_poly1305.nim",
-  "tests/t_mac_hmac.nim",
-  "tests/t_kdf_hkdf.nim",
-  "tests/t_ethereum_eip2333_bls12381_key_derivation.nim"
+  "tests/t_"
 ]
 
 when defined(windows):
@@ -323,13 +488,19 @@ when defined(windows):
   const sanitizers = ""
 else:
   const sanitizers =
-    " --passC:-fsanitize=undefined --passL:-fsanitize=undefined" &
-    " --passC:-fno-sanitize-recover" & # Enforce crash on undefined behaviour
-    " --gc:none" # The conservative stack scanning of Nim default GC triggers, alignment UB and stack-buffer-overflow check.
-    # " --passC:-fsanitize=address --passL:-fsanitize=address" & # Requires too much stack for the inline assembly
+    # Sanitizers are incompatible with nim default GC
+    # The conservative stack scanning of Nim default GC triggers, alignment UB and stack-buffer-overflow check.
+    # Address sanitizer requires free registers and needs to be disabled for some inline assembly files.
+    # Ensure you use --mm:arc -d:useMalloc
+    #
+    # Sanitizers are deactivated by default as they slow down CI by at least 6x
+
+    # " --passC:-fsanitize=undefined --passL:-fsanitize=undefined" &
+    # " --passC:-fsanitize=address --passL:-fsanitize=address" &
+    " --passC:-fno-sanitize-recover" # Enforce crash on undefined behaviour
 
 
-# Helper functions
+# Tests & Benchmarks helper functions
 # ----------------------------------------------------------------
 
 proc clearParallelBuild() =
@@ -337,7 +508,7 @@ proc clearParallelBuild() =
   if fileExists(buildParallel):
     rmFile(buildParallel)
 
-template setupCommand(): untyped {.dirty.} =
+template setupTestCommand(): untyped {.dirty.} =
   var lang = "c"
   if existsEnv"TEST_LANG":
     lang = getEnv"TEST_LANG"
@@ -349,10 +520,12 @@ template setupCommand(): untyped {.dirty.} =
   var flags = flags
   when not defined(windows):
     # Not available in MinGW https://github.com/libressl-portable/portable/issues/54
-    flags &= " --passC:-fstack-protector-strong"
-  let command = "nim " & lang & cc & " -d:release " & flags &
-    " --panics:on " & # Defects are not catchable
-    " --verbosity:0 --outdir:build/testsuite -r --hints:off --warnings:off " &
+    flags &= " --passC:-fstack-protector-strong --passC:-D_FORTIFY_SOURCE=2 "
+  let command = "nim " & lang & cc &
+    " -r " &
+    flags &
+    releaseBuildOptions() &
+    " --outdir:build/testsuite " &
     " --nimcache:nimcache/" & path & " " &
     path
 
@@ -363,7 +536,7 @@ proc test(cmd: string) =
   exec cmd
 
 proc testBatch(commands: var string, flags, path: string) =
-  setupCommand()
+  setupTestCommand()
   commands &= command & '\n'
 
 template setupBench(): untyped {.dirty.} =
@@ -383,10 +556,10 @@ template setupBench(): untyped {.dirty.} =
   if not useAsm:
     cc &= " -d:CttASM=false"
   let command = "nim " & lang & cc &
-       " --panics:on " & # Defects are not catchable
-       " -d:danger --verbosity:0 -o:build/bench/" & benchName & "_" & compiler & "_" & (if useAsm: "useASM" else: "noASM") &
+       releaseBuildOptions() &
+       " -o:build/bench/" & benchName & "_" & compiler & "_" & (if useAsm: "useASM" else: "noASM") &
        " --nimcache:nimcache/benches/" & benchName & "_" & compiler & "_" & (if useAsm: "useASM" else: "noASM") &
-       runFlag & "--hints:off --warnings:off benchmarks/" & benchName & ".nim"
+       runFlag & " benchmarks/" & benchName & ".nim"
 
 proc runBench(benchName: string, compiler = "", useAsm = true) =
   if not dirExists "build":
@@ -410,11 +583,11 @@ proc addTestSet(cmdFile: var string, requireGMP: bool, test32bit = false, testAS
     if not(td.useGMP and not requireGMP):
       var flags = ""
       if not testASM:
-        flags &= " -d:CttASM=false"
+        flags &= " -d:CttASM=false "
       if test32bit:
-        flags &= " -d:Constantine32"
+        flags &= " -d:Constantine32 "
       if td.path in useDebug:
-        flags &= " -d:debugConstantine"
+        flags &= " -d:debugConstantine "
       if td.path notin skipSanitizers:
         flags &= sanitizers
 
@@ -425,8 +598,11 @@ proc addTestSetNvidia(cmdFile: var string) =
     mkDir "build"
   echo "Found " & $testDescNvidia.len & " tests to run."
 
-  for path in testDescNvidia:
-    cmdFile.testBatch(flags = "", path)
+  for path in testDescThreadpool:
+    var flags = ""
+    if path notin skipSanitizers:
+      flags &= sanitizers
+    cmdFile.testBatch(flags, path)
 
 proc addTestSetThreadpool(cmdFile: var string) =
   if not dirExists "build":
@@ -434,7 +610,10 @@ proc addTestSetThreadpool(cmdFile: var string) =
   echo "Found " & $testDescThreadpool.len & " tests to run."
 
   for path in testDescThreadpool:
-    cmdFile.testBatch(flags = "--threads:on --linetrace:on --debugger:native", path)
+    var flags = " --threads:on --debugger:native "
+    if path notin skipSanitizers:
+      flags &= sanitizers
+    cmdFile.testBatch(flags, path)
 
 proc addTestSetMultithreadedCrypto(cmdFile: var string, test32bit = false, testASM = true) =
   if not dirExists "build":
@@ -461,115 +640,12 @@ proc addBenchSet(cmdFile: var string, useAsm = true) =
   for bd in benchDesc:
     cmdFile.buildBenchBatch(bd, useASM = useASM)
 
-proc genDynamicBindings(bindingsName, prefixNimMain: string) =
-  proc compile(libName: string, flags = "") =
-    # -d:danger to avoid boundsCheck, overflowChecks that would trigger exceptions or allocations in a crypto library.
-    #           Those are internally guaranteed at compile-time by fixed-sized array
-    #           and checked at runtime with an appropriate error code if any for user-input.
-    # -gc:arc   Constantine stack allocates everything. Inputs are through unmanaged ptr+len.
-    #           In the future, Constantine might use:
-    #             - heap-allocated sequences and objects manually managed or managed by destructors for multithreading.
-    #             - heap-allocated strings for hex-string or decimal strings
-    echo "Compiling dynamic library: lib/" & libName
-    exec "nim c -f " & flags & " --noMain -d:danger --app:lib --gc:arc " &
-         " --panics:on " & # Defects are not catchable
-         " --verbosity:0 --hints:off --warnings:off " &
-         " --nimMainPrefix:" & prefixNimMain &
-         " --out:" & libName & " --outdir:lib " &
-         " --nimcache:nimcache/bindings/" & bindingsName &
-         " bindings/" & bindingsName & ".nim"
-
-  when defined(windows):
-    compile bindingsName & ".dll"
-
-  elif defined(macosx):
-    compile "lib" & bindingsName & ".dylib.arm", "--cpu:arm64 -l:'-target arm64-apple-macos11' -t:'-target arm64-apple-macos11'"
-    compile "lib" & bindingsName & ".dylib.x64", "--cpu:amd64 -l:'-target x86_64-apple-macos10.12' -t:'-target x86_64-apple-macos10.12'"
-    exec "lipo lib/lib" & bindingsName & ".dylib.arm " &
-             " lib/lib" & bindingsName & ".dylib.x64 " &
-             " -output lib/lib" & bindingsName & ".dylib -create"
-
-  else:
-    compile "lib" & bindingsName & ".so"
-
-proc genStaticBindings(bindingsName, prefixNimMain: string) =
-  proc compile(libName: string, flags = "") =
-    # -d:danger to avoid boundsCheck, overflowChecks that would trigger exceptions or allocations in a crypto library.
-    #           Those are internally guaranteed at compile-time by fixed-sized array
-    #           and checked at runtime with an appropriate error code if any for user-input.
-    # -gc:arc   Constantine stack allocates everything. Inputs are through unmanaged ptr+len.
-    #           In the future, Constantine might use:
-    #             - heap-allocated sequences and objects manually managed or managed by destructors for multithreading.
-    #             - heap-allocated strings for hex-string or decimal strings
-    echo "Compiling static library:  lib/" & libName
-    exec "nim c -f " & flags & " --noMain -d:danger --app:staticLib --gc:arc " &
-         " --panics:on " & # Defects are not catchable
-         " --verbosity:0 --hints:off --warnings:off " &
-         " --nimMainPrefix:" & prefixNimMain &
-         " --out:" & libName & " --outdir:lib " &
-         " --nimcache:nimcache/bindings/" & bindingsName &
-         " bindings/" & bindingsName & ".nim"
-
-  when defined(windows):
-    compile bindingsName & ".lib"
-
-  elif defined(macosx):
-    compile "lib" & bindingsName & ".a.arm", "--cpu:arm64 -l:'-target arm64-apple-macos11' -t:'-target arm64-apple-macos11'"
-    compile "lib" & bindingsName & ".a.x64", "--cpu:amd64 -l:'-target x86_64-apple-macos10.12' -t:'-target x86_64-apple-macos10.12'"
-    exec "lipo lib/lib" & bindingsName & ".a.arm " &
-             " lib/lib" & bindingsName & ".a.x64 " &
-             " -output lib/lib" & bindingsName & ".a -create"
-
-  else:
-    compile "lib" & bindingsName & ".a"
-
-proc genHeaders(bindingsName: string) =
-  echo "Generating header:         include/" & bindingsName & ".h"
-  exec "nim c -d:release -d:CttGenerateHeaders " &
-       " --verbosity:0 --hints:off --warnings:off " &
-       " --out:" & bindingsName & "_gen_header.exe --outdir:build " &
-       " --nimcache:nimcache/bindings/" & bindingsName & "_header" &
-       " bindings/" & bindingsName & ".nim"
-  exec "build/" & bindingsName & "_gen_header.exe include"
-
 proc genParallelCmdRunner() =
   exec "nim c --verbosity:0 --hints:off --warnings:off -d:release --out:build/pararun --nimcache:nimcache/pararun helpers/pararun.nim"
 
 # Tasks
 # ----------------------------------------------------------------
 
-task bindings, "Generate Constantine bindings":
-  genDynamicBindings("constantine_bls12_381", "ctt_bls12381_init_")
-  genStaticBindings("constantine_bls12_381", "ctt_bls12381_init_")
-  genHeaders("constantine_bls12_381")
-  echo ""
-  genDynamicBindings("constantine_pasta", "ctt_pasta_init_")
-  genStaticBindings("constantine_pasta", "ctt_pasta_init_")
-  genHeaders("constantine_pasta")
-
-task test_bindings, "Test C bindings":
-  exec "mkdir -p build/testsuite"
-  echo "--> Testing dynamically linked library"
-  when not defined(windows):
-    exec "gcc -Iinclude -Llib -o build/testsuite/t_libctt_bls12_381_dl examples_c/t_libctt_bls12_381.c -lgmp -lconstantine_bls12_381"
-    exec "LD_LIBRARY_PATH=lib ./build/testsuite/t_libctt_bls12_381_dl"
-  else:
-    # Put DLL near the exe as LD_LIBRARY_PATH doesn't work even in an POSIX compatible shell
-    exec "gcc -Iinclude -Llib -o build/testsuite/t_libctt_bls12_381_dl.exe examples_c/t_libctt_bls12_381.c -lgmp -lconstantine_bls12_381"
-    exec "./build/testsuite/t_libctt_bls12_381_dl.exe"
-
-  echo "--> Testing statically linked library"
-  when not defined(windows):
-    # Beware MacOS annoying linker with regards to static libraries
-    # The following standard way cannot be used on MacOS
-    # exec "gcc -Iinclude -Llib -o build/t_libctt_bls12_381_sl.exe examples_c/t_libctt_bls12_381.c -lgmp -Wl,-Bstatic -lconstantine_bls12_381 -Wl,-Bdynamic"
-
-    exec "gcc -Iinclude -o build/testsuite/t_libctt_bls12_381_sl examples_c/t_libctt_bls12_381.c lib/libconstantine_bls12_381.a -lgmp"
-    exec "./build/testsuite/t_libctt_bls12_381_sl"
-  else:
-    exec "gcc -Iinclude -o build/testsuite/t_libctt_bls12_381_sl.exe examples_c/t_libctt_bls12_381.c lib/constantine_bls12_381.lib -lgmp"
-    exec "./build/testsuite/t_libctt_bls12_381_sl.exe"
-
 task test, "Run all tests":
   # -d:testingCurves is configured in a *.nim.cfg for convenience
   var cmdFile: string
@@ -1123,17 +1199,17 @@ task bench_hash_to_curve_clang_noasm, "Run Hash-to-Curve benchmarks":
 
 # BLS signatures
 # ------------------------------------------
-task bench_blssig_on_bls12_381_g2, "Run Hash-to-Curve benchmarks":
-  runBench("bench_blssig_on_bls12_381_g2")
+task bench_ethereum_bls_signatures, "Run Ethereum BLS signatures benchmarks":
+  runBench("bench_ethereum_bls_signatures")
 
-task bench_blssig_on_bls12_381_g2_gcc, "Run Hash-to-Curve benchmarks":
-  runBench("bench_blssig_on_bls12_381_g2", "gcc")
+task bench_ethereum_bls_signatures_gcc, "Run Ethereum BLS signatures benchmarks":
+  runBench("bench_ethereum_bls_signatures", "gcc")
 
-task bench_blssig_on_bls12_381_g2_clang, "Run Hash-to-Curve benchmarks":
-  runBench("bench_blssig_on_bls12_381_g2", "clang")
+task bench_ethereum_bls_signatures_clang, "Run Ethereum BLS signatures benchmarks":
+  runBench("bench_ethereum_bls_signatures", "clang")
 
-task bench_blssig_on_bls12_381_g2_gcc_noasm, "Run Hash-to-Curve benchmarks":
-  runBench("bench_blssig_on_bls12_381_g2", "gcc", useAsm = false)
+task bench_ethereum_bls_signatures_gcc_noasm, "Run Ethereum BLS signatures benchmarks":
+  runBench("bench_ethereum_bls_signatures", "gcc", useAsm = false)
 
-task bench_blssig_on_bls12_381_g2_clang_noasm, "Run Hash-to-Curve benchmarks":
-  runBench("bench_blssig_on_bls12_381_g2", "clang", useAsm = false)
+task bench_ethereum_bls_signatures_clang_noasm, "Run Ethereum BLS signatures benchmarks":
+  runBench("bench_ethereum_bls_signatures", "clang", useAsm = false)
diff --git a/constantine/ciphers/chacha20.nim b/constantine/ciphers/chacha20.nim
index 8065ba74a..c26fd2ad2 100644
--- a/constantine/ciphers/chacha20.nim
+++ b/constantine/ciphers/chacha20.nim
@@ -6,7 +6,7 @@
 #   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
-import ../platforms/endians
+import ../platforms/[endians, views]
 
 # ############################################################
 #
@@ -79,18 +79,18 @@ func chacha20_block(
 
   # uint32 are 4 bytes so multiply destination by 4
   for i in 0'u ..< 4:
-    key_stream.dumpRawInt(state[i] + cccc[i], i shl 2, littleEndian) 
+    key_stream.dumpRawInt(state[i] + cccc[i], i shl 2, littleEndian)
   for i in 4'u ..< 12:
     key_stream.dumpRawInt(state[i] + key[i-4], i shl 2, littleEndian)
   key_stream.dumpRawInt(state[12] + block_counter, 12 shl 2, littleEndian)
   for i in 13'u ..< 16:
     key_stream.dumpRawInt(state[i] + nonce[i-13], i shl 2, littleEndian)
 
-func chacha20_cipher*[T: byte|char](
+func chacha20_cipher*(
        key: array[32, byte],
        counter: uint32,
        nonce: array[12, byte],
-       data: var openarray[T]): uint32 =
+       data: var openArray[byte]): uint32 {.genCharAPI.} =
   ## Encrypt or decrypt `data` using the ChaCha20 cipher
   ## - `key` is a 256-bit (32 bytes) secret shared encryption/decryption key.
   ## - `counter`. A monotonically increasing value per encryption.
diff --git a/constantine/blssig_pop_on_bls12381_g2.nim b/constantine/ethereum_bls_signatures.nim
similarity index 67%
rename from constantine/blssig_pop_on_bls12381_g2.nim
rename to constantine/ethereum_bls_signatures.nim
index e1972676f..b44b4fb1a 100644
--- a/constantine/blssig_pop_on_bls12381_g2.nim
+++ b/constantine/ethereum_bls_signatures.nim
@@ -6,34 +6,15 @@
 #   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
-import
-    ./platforms/abstractions,
-    ./math/config/curves,
-    ./math/[
-      ec_shortweierstrass,
-      extension_fields,
-      arithmetic,
-      constants/zoo_subgroups
-    ],
-    ./math/io/[io_bigints, io_fields],
-    hashes,
-    signatures/bls_signatures
-
-export
-  abstractions, # generic sandwich on SecretBool and SecretBool in Jacobian sumImpl
-  curves, # generic sandwich on matchingBigInt
-  extension_fields, # generic sandwich on extension field access
-  hashes, # generic sandwich on sha256
-  ec_shortweierstrass # generic sandwich on affine
-
 ## ############################################################
 ##
-##              BLS Signatures on BLS12-381 G2
+##              BLS Signatures on for Ethereum
 ##
 ## ############################################################
 ##
 ## This module implements BLS Signatures (Boneh-Lynn-Schacham)
-## on top of the BLS12-381 curve (Barreto-Lynn-Scott).
+## on top of the BLS12-381 curve (Barreto-Lynn-Scott) G2.
+## for the Ethereum blockchain.
 ##
 ## Ciphersuite:
 ##
@@ -45,33 +26,83 @@ export
 ## - Domain separation tag: "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_"
 ## - Hash function: SHA256
 ##
-## Currently Constantine does not provide popProve and popVerify
-## which are thin wrapper over sign/verify with
-## - the message to sign or verify being the compressed or uncompressed public key
-##   or another application-specific "hash_pubkey_to_point" scheme
-## - domain-separation-tag: "BLS_POP_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_"
+## Specs:
+## - https://github.com/ethereum/consensus-specs/blob/v1.2.0/specs/phase0/beacon-chain.md#bls-signatures
+## - https://github.com/ethereum/consensus-specs/blob/v1.2.0/specs/altair/bls.md
+## - https://www.ietf.org/archive/id/draft-irtf-cfrg-bls-signature-05.html
 ##
-## Constantine currently assumes that proof-of-possessions are handled at the application-level
+## Test vectors:
+## - https://github.com/ethereum/bls12-381-tests
 ##
-## In proof-of-stake blockchains, being part of the staker/validator sets
-## already serve as proof-of-possession.
+## The Ethereum blockchain uses the proof-of-possession scheme (PoP).
+## Each public key is associated with a deposit proof required to participate
+## in the blockchain consensus protocol, hence PopProve and PopVerify
+## as defined in the IETF spec are not needed.
 
 const DST = "BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_"
-const ffi_prefix {.used.} = "ctt_blssig_pop_on_bls12381_g2_"
+const prefix_ffi = "ctt_eth_bls_"
+
+# Dependencies exports for C FFI
+# ------------------------------------------------------------------------------------------------
+
+import ./zoo_exports
+
+static:
+  # Xxport SHA256 routines with a protocol specific prefix
+  # This exports sha256.init(), sha256.update(), sha256.finish() and sha256.clear()
+  prefix_sha256 = prefix_ffi & "_sha256_"
+
+import hashes
+export hashes # generic sandwich on sha256
+
+func sha256_hash*(digest: var array[32, byte], message: openArray[byte], clearMem: bool) {.libPrefix: prefix_ffi.} =
+  ## Compute the SHA-256 hash of message
+  ## and store the result in digest.
+  ## Optionally, clear the memory buffer used.
+
+  # There is an extra indirect function call as we use a generic `hash` concept but:
+  # - the indirection saves space (instead of duplicating `hash`)
+  # - minimal overhead compared to hashing time
+  # - Can be tail-call optimized into a goto jump instead of call/return
+  # - Can be LTO-optimized
+  sha256.hash(digest, message, clearMem)
+
+# Imports
+# ------------------------------------------------------------------------------------------------
+
+import
+    ./platforms/[abstractions, views],
+    ./math/config/curves,
+    ./math/[
+      ec_shortweierstrass,
+      extension_fields,
+      arithmetic,
+      constants/zoo_subgroups
+    ],
+    ./math/io/[io_bigints, io_fields],
+    signatures/bls_signatures
+
+export
+  abstractions, # generic sandwich on SecretBool and SecretBool in Jacobian sumImpl
+  curves, # generic sandwich on matchingBigInt
+  extension_fields, # generic sandwich on extension field access
+  ec_shortweierstrass # generic sandwich on affine
 
-{.push raises: [].} # No exceptions allowed in core cryptographic operations
-# {.push cdecl, dynlib, exportc:ffi_prefix & "$1".} # TODO, C API
+# Protocol types
+# ------------------------------------------------------------------------------------------------
+
+{.checks: off.} # No exceptions allowed in core cryptographic operations
 
 type
-  SecretKey* {.byref.} = object
+  SecretKey* {.byref, exportc: prefix_ffi & "seckey".} = object
     ## A BLS12_381 secret key
     raw: matchingOrderBigInt(BLS12_381)
 
-  PublicKey* {.byref.} = object
+  PublicKey* {.byref, exportc: prefix_ffi & "pubkey".} = object
     ## A BLS12_381 public key for BLS signature schemes with public keys on G1 and signatures on G2
     raw: ECP_ShortW_Aff[Fp[BLS12_381], G1]
 
-  Signature* {.byref.} = object
+  Signature* {.byref, exportc: prefix_ffi & "signature".} = object
     ## A BLS12_381 signature for BLS signature schemes with public keys on G1 and signatures on G2
     raw: ECP_ShortW_Aff[Fp2[BLS12_381], G2]
 
@@ -91,18 +122,26 @@ type
 # Comparisons
 # ------------------------------------------------------------------------------------------------
 
-func isZero*(elem: PublicKey or Signature): bool =
+func pubkey_is_zero*(pubkey: PublicKey): bool {.libPrefix: prefix_ffi.} =
+  ## Returns true if input is 0
+  bool(pubkey.raw.isInf())
+
+func signature_is_zero*(sig: Signature): bool {.libPrefix: prefix_ffi.} =
   ## Returns true if input is 0
-  bool(elem.raw.isInf())
+  bool(sig.raw.isInf())
 
-func `==`*(a, b: PublicKey or Signature): bool =
+func pubkeys_are_equal*(a, b: PublicKey): bool {.libPrefix: prefix_ffi.} =
+  ## Returns true if inputs are equal
+  bool(a.raw == b.raw)
+
+func signatures_are_equal*(a, b: Signature): bool {.libPrefix: prefix_ffi.} =
   ## Returns true if inputs are equal
   bool(a.raw == b.raw)
 
 # Input validation
 # ------------------------------------------------------------------------------------------------
 
-func validate_seckey*(secret_key: SecretKey): CttBLSStatus =
+func validate_seckey*(secret_key: SecretKey): CttBLSStatus {.libPrefix: prefix_ffi.} =
   ## Validate the secret key.
   ## Regarding timing attacks, this will leak timing information only if the key is invalid.
   ## Namely, the secret key is 0 or the secret key is too large.
@@ -112,7 +151,7 @@ func validate_seckey*(secret_key: SecretKey): CttBLSStatus =
     return cttBLS_SecretKeyLargerThanCurveOrder
   return cttBLS_Success
 
-func validate_pubkey*(public_key: PublicKey): CttBLSStatus =
+func validate_pubkey*(public_key: PublicKey): CttBLSStatus {.libPrefix: prefix_ffi.} =
   ## Validate the public key.
   ## This is an expensive operation that can be cached
   if public_key.raw.isInf().bool():
@@ -122,7 +161,7 @@ func validate_pubkey*(public_key: PublicKey): CttBLSStatus =
   if not public_key.raw.isInSubgroup().bool():
     return cttBLS_PointNotInSubgroup
 
-func validate_sig*(signature: Signature): CttBLSStatus =
+func validate_signature*(signature: Signature): CttBLSStatus {.libPrefix: prefix_ffi.} =
   ## Validate the signature.
   ## This is an expensive operation that can be cached
   if signature.raw.isInf().bool():
@@ -153,17 +192,17 @@ func validate_sig*(signature: Signature): CttBLSStatus =
 ##     The third-most significant bit is set if (and only if) this point is in compressed form
 ##     and it is not the point at infinity and its y-coordinate is the lexicographically largest of the two associated with the encoded x-coordinate.
 ##
-## - https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-bls-signature-04#appendix-A
+## - https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-bls-signature-05#appendix-A
 ## - https://docs.rs/bls12_381/latest/bls12_381/notes/serialization/index.html
 ##   - https://github.com/zkcrypto/bls12_381/blob/0.6.0/src/notes/serialization.rs
 
-func serialize_secret_key*(dst: var array[32, byte], secret_key: SecretKey): CttBLSStatus =
+func serialize_seckey*(dst: var array[32, byte], secret_key: SecretKey): CttBLSStatus {.libPrefix: prefix_ffi.} =
   ## Serialize a secret key
   ## Returns cttBLS_Success if successful
   dst.marshal(secret_key.raw, bigEndian)
   return cttBLS_Success
 
-func serialize_public_key_compressed*(dst: var array[48, byte], public_key: PublicKey): CttBLSStatus =
+func serialize_pubkey_compressed*(dst: var array[48, byte], public_key: PublicKey): CttBLSStatus {.libPrefix: prefix_ffi.} =
   ## Serialize a public key in compressed (Zcash) format
   ##
   ## Returns cttBLS_Success if successful
@@ -184,7 +223,7 @@ func serialize_public_key_compressed*(dst: var array[48, byte], public_key: Publ
 
   return cttBLS_Success
 
-func serialize_signature_compressed*(dst: var array[96, byte], signature: Signature): CttBLSStatus =
+func serialize_signature_compressed*(dst: var array[96, byte], signature: Signature): CttBLSStatus {.libPrefix: prefix_ffi.} =
   ## Serialize a signature in compressed (Zcash) format
   ##
   ## Returns cttBLS_Success if successful
@@ -206,8 +245,9 @@ func serialize_signature_compressed*(dst: var array[96, byte], signature: Signat
 
   return cttBLS_Success
 
-func deserialize_secret_key*(dst: var SecretKey, src: array[32, byte]): CttBLSStatus =
-  ## deserialize a secret key
+func deserialize_seckey*(dst: var SecretKey, src: array[32, byte]): CttBLSStatus {.libPrefix: prefix_ffi.} =
+  ## Deserialize a secret key
+  ## This also validates the secret key.
   ##
   ## This is protected against side-channel unless your key is invalid.
   ## In that case it will like whether it's all zeros or larger than the curve order.
@@ -218,7 +258,7 @@ func deserialize_secret_key*(dst: var SecretKey, src: array[32, byte]): CttBLSSt
     return status
   return cttBLS_Success
 
-func deserialize_public_key_compressed_unchecked*(dst: var PublicKey, src: array[48, byte]): CttBLSStatus =
+func deserialize_pubkey_compressed_unchecked*(dst: var PublicKey, src: array[48, byte]): CttBLSStatus {.libPrefix: prefix_ffi.} =
   ## Deserialize a public_key in compressed (Zcash) format.
   ##
   ## Warning ⚠:
@@ -260,19 +300,20 @@ func deserialize_public_key_compressed_unchecked*(dst: var PublicKey, src: array
   let srcIsLargest = SecretBool((src[0] shr 5) and byte 1)
   dst.raw.y.cneg(isLexicographicallyLargest xor srcIsLargest)
 
-func deserialize_public_key_compressed*(dst: var PublicKey, src: array[48, byte]): CttBLSStatus =
+func deserialize_pubkey_compressed*(dst: var PublicKey, src: array[48, byte]): CttBLSStatus {.libPrefix: prefix_ffi.} =
   ## Deserialize a public_key in compressed (Zcash) format
+  ## This also validates the public key.
   ##
   ## Returns cttBLS_Success if successful
 
-  result = deserialize_public_key_compressed_unchecked(dst, src)
+  result = deserialize_pubkey_compressed_unchecked(dst, src)
   if result != cttBLS_Success:
     return result
 
-  if not(bool dst.raw.isInSubgroup):
+  if not(bool dst.raw.isInSubgroup()):
     return cttBLS_PointNotInSubgroup
 
-func deserialize_signature_compressed_unchecked*(dst: var Signature, src: array[96, byte]): CttBLSStatus =
+func deserialize_signature_compressed_unchecked*(dst: var Signature, src: array[96, byte]): CttBLSStatus {.libPrefix: prefix_ffi.} =
   ## Deserialize a signature in compressed (Zcash) format.
   ##
   ## Warning ⚠:
@@ -325,7 +366,7 @@ func deserialize_signature_compressed_unchecked*(dst: var Signature, src: array[
   let srcIsLargest = SecretBool((src[0] shr 5) and byte 1)
   dst.raw.y.cneg(isLexicographicallyLargest xor srcIsLargest)
 
-func deserialize_signature_compressed*(dst: var Signature, src: array[96, byte]): CttBLSStatus =
+func deserialize_signature_compressed*(dst: var Signature, src: array[96, byte]): CttBLSStatus {.libPrefix: prefix_ffi.} =
   ## Deserialize a public_key in compressed (Zcash) format
   ##
   ## Returns cttBLS_Success if successful
@@ -334,13 +375,13 @@ func deserialize_signature_compressed*(dst: var Signature, src: array[96, byte])
   if result != cttBLS_Success:
     return result
 
-  if not(bool dst.raw.isInSubgroup):
+  if not(bool dst.raw.isInSubgroup()):
     return cttBLS_PointNotInSubgroup
 
-# Signatures
+# BLS Signatures
 # ------------------------------------------------------------------------------------------------
 
-func derive_public_key*(public_key: var PublicKey, secret_key: SecretKey): CttBLSStatus =
+func derive_pubkey*(public_key: var PublicKey, secret_key: SecretKey): CttBLSStatus {.libPrefix: prefix_ffi.} =
   ## Derive the public key matching with a secret key
   ##
   ## Secret protection:
@@ -356,7 +397,7 @@ func derive_public_key*(public_key: var PublicKey, secret_key: SecretKey): CttBL
     return cttBLS_InvalidEncoding
   return cttBLS_Success
 
-func sign*[T: byte|char](signature: var Signature, secret_key: SecretKey, message: openArray[T]): CttBLSStatus =
+func sign*(signature: var Signature, secret_key: SecretKey, message: openArray[byte]): CttBLSStatus {.libPrefix: prefix_ffi, genCharAPI.} =
   ## Produce a signature for the message under the specified secret key
   ## Signature is on BLS12-381 G2 (and public key on G1)
   ##
@@ -382,7 +423,7 @@ func sign*[T: byte|char](signature: var Signature, secret_key: SecretKey, messag
   coreSign(signature.raw, secretKey.raw, message, sha256, 128, augmentation = "", DST)
   return cttBLS_Success
 
-func verify*[T: byte|char](public_key: PublicKey, message: openarray[T], signature: Signature): CttBLSStatus =
+func verify*(public_key: PublicKey, message: openArray[byte], signature: Signature): CttBLSStatus {.libPrefix: prefix_ffi, genCharAPI.} =
   ## Check that a signature is valid for a message
   ## under the provided public key.
   ## returns `true` if the signature is valid, `false` otherwise.
@@ -394,9 +435,13 @@ func verify*[T: byte|char](public_key: PublicKey, message: openarray[T], signatu
   ##   Or validated via validate_pubkey
   ## - A message
   ## - A signature initialized by one of the key derivation or deserialization procedure.
-  ##   Or validated via validate_pubkey
+  ##   Or validated via validate_signature
   ##
-  ## In particular, the public key and signature are assumed to be on curve subgroup checked.
+  ## Output:
+  ## - a status code with verification success if signature is valid
+  ##   or indicating verification failure
+  ##
+  ## In particular, the public key and signature are assumed to be on curve and subgroup-checked.
 
   # Deal with cases were pubkey or signature were mistakenly zero-init, due to a generic aggregation tentative for example
   if bool(public_key.raw.isInf() or signature.raw.isInf()):
@@ -411,25 +456,29 @@ template unwrap[T: PublicKey|Signature](elems: openArray[T]): auto =
   # Unwrap collection of high-level type into collection of low-level type
   toOpenArray(cast[ptr UncheckedArray[typeof elems[0].raw]](elems[0].raw.unsafeAddr), elems.low, elems.high)
 
-func aggregate_pubkeys*(aggregate_pubkey: var PublicKey, pubkeys: openArray[PublicKey]) =
+func aggregate_pubkeys_unstable_api*(aggregate_pubkey: var PublicKey, pubkeys: openArray[PublicKey]) =
   ## Aggregate public keys into one
   ## The individual public keys are assumed to be validated, either during deserialization
   ## or by validate_pubkeys
+  #
+  # TODO: Return a bool or status code or nothing?
   if pubkeys.len == 0:
     aggregate_pubkey.raw.setInf()
     return
   aggregate_pubkey.raw.aggregate(pubkeys.unwrap())
 
-func aggregate_signatures*(aggregate_sig: var Signature, signatures: openArray[Signature]) =
+func aggregate_signatures_unstable_api*(aggregate_sig: var Signature, signatures: openArray[Signature]) =
   ## Aggregate signatures into one
   ## The individual signatures are assumed to be validated, either during deserialization
   ## or by validate_signature
+  #
+  # TODO: Return a bool or status code or nothing?
   if signatures.len == 0:
     aggregate_sig.raw.setInf()
     return
   aggregate_sig.raw.aggregate(signatures.unwrap())
 
-func fast_aggregate_verify*[T: byte|char](pubkeys: openArray[PublicKey], message: openarray[T], aggregate_sig: Signature): CttBLSStatus =
+func fast_aggregate_verify*(pubkeys: openArray[PublicKey], message: openArray[byte], aggregate_sig: Signature): CttBLSStatus {.libPrefix: prefix_ffi, genCharAPI.} =
   ## Check that a signature is valid for a message
   ## under the aggregate of provided public keys.
   ## returns `true` if the signature is valid, `false` otherwise.
@@ -441,7 +490,7 @@ func fast_aggregate_verify*[T: byte|char](pubkeys: openArray[PublicKey], message
   ##   Or validated via validate_pubkey
   ## - A message
   ## - A signature initialized by one of the key derivation or deserialization procedure.
-  ##   Or validated via validate_sig
+  ##   Or validated via validate_signature
   ##
   ## In particular, the public keys and signature are assumed to be on curve subgroup checked.
 
@@ -465,7 +514,52 @@ func fast_aggregate_verify*[T: byte|char](pubkeys: openArray[PublicKey], message
     return cttBLS_Success
   return cttBLS_VerificationFailure
 
-func aggregate_verify*[M](pubkeys: openArray[PublicKey], messages: openarray[M], aggregate_sig: Signature): CttBLSStatus =
+# C FFI
+func aggregate_verify*(pubkeys: ptr UncheckedArray[PublicKey],
+                       messages: ptr UncheckedArray[View[byte]],
+                       len: int,
+                       aggregate_sig: Signature): CttBLSStatus {.libPrefix: prefix_ffi.} =
+  ## Verify the aggregated signature of multiple (pubkey, message) pairs
+  ## returns `true` if the signature is valid, `false` otherwise.
+  ##
+  ## For message domain separation purpose, the tag is `BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_`
+  ##
+  ## Input:
+  ## - Public keys initialized by one of the key derivation or deserialization procedure.
+  ##   Or validated via validate_pubkey
+  ## - Messages
+  ## - a signature initialized by one of the key derivation or deserialization procedure.
+  ##   Or validated via validate_signature
+  ##
+  ## In particular, the public keys and signature are assumed to be on curve subgroup checked.
+  ##
+  ## To avoid splitting zeros and rogue keys attack:
+  ## 1. Public keys signing the same message MUST be aggregated and checked for 0 before calling this function.
+  ## 2. Augmentation or Proof of possessions must used for each public keys.
+
+  if len == 0:
+    # IETF spec precondition
+    return cttBLS_ZeroLengthAggregation
+
+  # Deal with cases were pubkey or signature were mistakenly zero-init, due to a generic aggregation tentative for example
+  if aggregate_sig.raw.isInf().bool:
+    return cttBLS_PointAtInfinity
+
+  for i in 0 ..< len:
+    if pubkeys[i].raw.isInf().bool:
+      return cttBLS_PointAtInfinity
+
+  let verified = aggregateVerify(
+    pubkeys.toOpenArray(len).unwrap(),
+    messages.toOpenArray(len),
+    aggregate_sig.raw,
+    sha256, 128, DST)
+  if verified:
+    return cttBLS_Success
+  return cttBLS_VerificationFailure
+
+# Nim
+func aggregate_verify*[Msg](pubkeys: openArray[PublicKey], messages: openArray[Msg], aggregate_sig: Signature): CttBLSStatus =
   ## Verify the aggregated signature of multiple (pubkey, message) pairs
   ## returns `true` if the signature is valid, `false` otherwise.
   ##
@@ -476,12 +570,12 @@ func aggregate_verify*[M](pubkeys: openArray[PublicKey], messages: openarray[M],
   ##   Or validated via validate_pubkey
   ## - Messages
   ## - a signature initialized by one of the key derivation or deserialization procedure.
-  ##   Or validated via validate_sig
+  ##   Or validated via validate_signature
   ##
   ## In particular, the public keys and signature are assumed to be on curve subgroup checked.
   ##
   ## To avoid splitting zeros and rogue keys attack:
-  ## 1. Public keys signing the same message MUST be aggregated and checked for 0 before calling BLSAggregateSigAccumulator.update()
+  ## 1. Public keys signing the same message MUST be aggregated and checked for 0 before calling this function.
   ## 2. Augmentation or Proof of possessions must used for each public keys.
 
   if pubkeys.len == 0:
@@ -507,7 +601,59 @@ func aggregate_verify*[M](pubkeys: openArray[PublicKey], messages: openarray[M],
     return cttBLS_Success
   return cttBLS_VerificationFailure
 
-func batch_verify*[M](pubkeys: openArray[PublicKey], messages: openarray[M], signatures: openArray[Signature], secureRandomBytes: array[32, byte]): CttBLSStatus =
+# C FFI
+func batch_verify*[Msg](pubkeys: ptr UncheckedArray[PublicKey],
+                        messages: ptr UncheckedArray[View[byte]],
+                        signatures: ptr UncheckedArray[Signature],
+                        len: int,
+                        secureRandomBytes: array[32, byte]): CttBLSStatus {.libPrefix: prefix_ffi.} =
+  ## Verify that all (pubkey, message, signature) triplets are valid
+  ## returns `true` if all signatures are valid, `false` if at least one is invalid.
+  ##
+  ## For message domain separation purpose, the tag is `BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_`
+  ##
+  ## Input:
+  ## - Public keys initialized by one of the key derivation or deserialization procedure.
+  ##   Or validated via validate_pubkey
+  ## - Messages
+  ## - Signatures initialized by one of the key derivation or deserialization procedure.
+  ##   Or validated via validate_signature
+  ##
+  ## In particular, the public keys and signature are assumed to be on curve subgroup checked.
+  ##
+  ## To avoid splitting zeros and rogue keys attack:
+  ## 1. Cryptographically-secure random bytes must be provided.
+  ## 2. Augmentation or Proof of possessions must used for each public keys.
+  ##
+  ## The secureRandomBytes will serve as input not under the attacker control to foil potential splitting zeros inputs.
+  ## The scheme assumes that the attacker cannot
+  ## resubmit 2^64 times forged (publickey, message, signature) triplets
+  ## against the same `secureRandomBytes`
+
+  if len == 0:
+    # IETF spec precondition
+    return cttBLS_ZeroLengthAggregation
+
+  # Deal with cases were pubkey or signature were mistakenly zero-init, due to a generic aggregation tentative for example
+  for i in 0 ..< len:
+    if pubkeys[i].raw.isInf().bool:
+      return cttBLS_PointAtInfinity
+
+  for i in 0 ..< len:
+    if signatures[i].raw.isInf().bool:
+      return cttBLS_PointAtInfinity
+
+  let verified = batchVerify(
+    pubkeys.toOpenArray(len).unwrap(),
+    messages,
+    signatures.toOpenArray(len).unwrap(),
+    sha256, 128, DST, secureRandomBytes)
+  if verified:
+    return cttBLS_Success
+  return cttBLS_VerificationFailure
+
+# Nim
+func batch_verify*[Msg](pubkeys: openArray[PublicKey], messages: openarray[Msg], signatures: openArray[Signature], secureRandomBytes: array[32, byte]): CttBLSStatus =
   ## Verify that all (pubkey, message, signature) triplets are valid
   ## returns `true` if all signatures are valid, `false` if at least one is invalid.
   ##
@@ -518,7 +664,7 @@ func batch_verify*[M](pubkeys: openArray[PublicKey], messages: openarray[M], sig
   ##   Or validated via validate_pubkey
   ## - Messages
   ## - Signatures initialized by one of the key derivation or deserialization procedure.
-  ##   Or validated via validate_sig
+  ##   Or validated via validate_signature
   ##
   ## In particular, the public keys and signature are assumed to be on curve subgroup checked.
   ##
diff --git a/constantine/ethereum_eip2333_bls12381_key_derivation.nim b/constantine/ethereum_eip2333_bls12381_key_derivation.nim
index 32105b042..7c905c86b 100644
--- a/constantine/ethereum_eip2333_bls12381_key_derivation.nim
+++ b/constantine/ethereum_eip2333_bls12381_key_derivation.nim
@@ -12,18 +12,18 @@ import
   ./math/config/[curves, type_ff],
   ./math/arithmetic/[bigints, limbs_montgomery],
   ./math/io/io_bigints,
-  ./platforms/endians
+  ./platforms/[primitives, endians]
 
 # EIP2333: BLS12-381 Key Generation
 # ------------------------------------------------------------
 #
 # https://eips.ethereum.org/EIPS/eip-2333
 
-{.push raises: [].} # No exceptions
+{.push raises: [], checks: off.} # No exceptions
 
 type SecretKey = matchingOrderBigInt(BLS12_381)
 
-func hkdf_mod_r[T: char|byte](secretKey: var SecretKey, ikm: openArray[byte], key_info: openArray[T]) =
+func hkdf_mod_r(secretKey: var SecretKey, ikm: openArray[byte], key_info: openArray[byte]) =
   ## Ethereum 2 EIP-2333, extracts this from the BLS signature schemes
   # 1. salt = "BLS-SIG-KEYGEN-SALT-"
   # 2. SK = 0
@@ -52,7 +52,7 @@ func hkdf_mod_r[T: char|byte](secretKey: var SecretKey, ikm: openArray[byte], ke
     const L = 48
     var okm{.noInit.}: array[L, byte]
     const L_octetstring = L.uint16.toBytesBE()
-    ctx.hkdfExpand(okm, prk, key_info, append = L_octetstring)
+    ctx.hkdfExpand(okm, prk, key_info, append = L_octetstring, clearMem = true)
     #  7. x = OS2IP(OKM) mod r
     #  We reduce mod r via Montgomery reduction, instead of bigint division
     #  as constant-time division works bits by bits (384 bits) while
@@ -64,10 +64,10 @@ func hkdf_mod_r[T: char|byte](secretKey: var SecretKey, ikm: openArray[byte], ke
     seckeyDbl.unmarshal(okm, bigEndian)
     # secretKey.reduce(seckeyDbl, BLS12_381.getCurveOrder())
     secretKey.limbs.redc2xMont(seckeyDbl.limbs,                                      # seckey/R
-                               BLS12_381.getCurveOrder().limbs, Fr[BLS12_381].getNegInvModWord(), 
+                               BLS12_381.getCurveOrder().limbs, Fr[BLS12_381].getNegInvModWord(),
                                Fr[BLS12_381].getSpareBits())
     secretKey.limbs.mulMont(secretKey.limbs, Fr[BLS12_381].getR2modP().limbs,        # (seckey/R) * R² * R⁻¹ = seckey
-                            BLS12_381.getCurveOrder().limbs, Fr[BLS12_381].getNegInvModWord(), 
+                            BLS12_381.getCurveOrder().limbs, Fr[BLS12_381].getNegInvModWord(),
                             Fr[BLS12_381].getSpareBits())
 
     if bool secretKey.isZero():
@@ -90,19 +90,20 @@ iterator ikm_to_lamport_SK(
 
   # 1. OKM = HKDF-Expand(PRK, "" , L)
   #    with L = K * 255 and K = 32 (sha256 output)
-  {.push checks: off.} # No OverflowError or IndexError allowed
   for i in ctx.hkdfExpandChunk(
             lamportSecretKeyChunk,
-            prk, "",""):
+            prk, default(array[0, byte]), default(array[0, byte])):
     yield i
 
+  ctx.clear()
+
 func parent_SK_to_lamport_PK(
        lamportPublicKey: var array[32, byte],
        parentSecretKey: SecretKey,
        index: uint32) =
   ## Derives the index'th child's lamport PublicKey
   ## from the parent SecretKey
-  
+
   # 0. salt = I2OSP(index, 4)
   let salt{.noInit.} = index.toBytesBE()
 
@@ -119,8 +120,6 @@ func parent_SK_to_lamport_PK(
 
   var tmp{.noInit.}, chunk{.noInit.}: array[32, byte]
 
-  {.push checks: off.} # No OverflowError or IndexError allowed
-
   # 2. lamport_0 = IKM_to_lamport_SK(IKM, salt)
   # 6. for i = 1, .., 255 (inclusive)
   #        lamport_PK = lamport_PK | SHA256(lamport_0[i])
@@ -130,7 +129,7 @@ func parent_SK_to_lamport_PK(
     if i == 254:
       # We iterate from 0
       break
-  
+
   # 3. not_IKM = flip_bits(parent_SK)
   for i in 0 ..< 32:
     ikm[i] = not ikm[i]
@@ -152,26 +151,26 @@ func parent_SK_to_lamport_PK(
 func derive_child_secretKey*(
         childSecretKey: var SecretKey,
         parentSecretKey: SecretKey,
-        index: uint32
-     ): bool =
+        index: uint32): bool =
   ## EIP2333 Child Key derivation function
   var compressed_lamport_PK{.noInit.}: array[32, byte]
   # 0. compressed_lamport_PK = parent_SK_to_lamport_PK(parent_SK, index)
   parent_SK_to_lamport_PK(
     compressed_lamport_PK,
     parentSecretKey,
-    index,
-  )
-  childSecretKey.hkdf_mod_r(compressed_lamport_PK, key_info = "")
+    index)
+  childSecretKey.hkdf_mod_r(compressed_lamport_PK, key_info = default(array[0, byte]))
+  compressed_lamport_PK.setZero()
   return true
 
 func derive_master_secretKey*(
         masterSecretKey: var SecretKey,
-        ikm: openArray[byte]
-     ): bool =
+        ikm: openArray[byte]): bool =
   ## EIP2333 Master key derivation
+  ## The input keying material SHOULD be cleared after use
+  ## to prevent leakage.
   if ikm.len < 32:
     return false
 
-  masterSecretKey.hkdf_mod_r(ikm, key_info = "")
+  masterSecretKey.hkdf_mod_r(ikm, key_info = default(array[0, byte]))
   return true
\ No newline at end of file
diff --git a/constantine/ethereum_evm_precompiles.nim b/constantine/ethereum_evm_precompiles.nim
index b1254f054..2af69171a 100644
--- a/constantine/ethereum_evm_precompiles.nim
+++ b/constantine/ethereum_evm_precompiles.nim
@@ -113,7 +113,7 @@ func eth_evm_ecadd*(r: var array[64, byte], inputs: openarray[byte]): CttEVMStat
 
   # Auto-pad with zero
   var padded: array[128, byte]
-  padded.copy(0, inputs, 0, min(inputs.len, 128))
+  padded.rawCopy(0, inputs, 0, min(inputs.len, 128))
 
   var P{.noInit.}, Q{.noInit.}, R{.noInit.}: ECP_ShortW_Prj[Fp[BN254_Snarks], G1]
 
@@ -168,7 +168,7 @@ func eth_evm_ecmul*(r: var array[64, byte], inputs: openarray[byte]): CttEVMStat
 
   # Auto-pad with zero
   var padded: array[128, byte]
-  padded.copy(0, inputs, 0, min(inputs.len, 128))
+  padded.rawCopy(0, inputs, 0, min(inputs.len, 128))
 
   var P{.noInit.}: ECP_ShortW_Prj[Fp[BN254_Snarks], G1]
 
diff --git a/constantine/hash_to_curve/h2c_hash_to_field.nim b/constantine/hash_to_curve/h2c_hash_to_field.nim
index b62b7ea95..5d30d124e 100644
--- a/constantine/hash_to_curve/h2c_hash_to_field.nim
+++ b/constantine/hash_to_curve/h2c_hash_to_field.nim
@@ -8,7 +8,7 @@
 
 import
   # Internals
-  ../platforms/[abstractions, endians],
+  ../platforms/[abstractions, endians, views],
   ../hashes,
   ../math/io/[io_bigints, io_fields],
   ../math/config/curves,
@@ -37,10 +37,10 @@ template strxor(b_i: var array, b0: array): untyped =
     b_i[i] = b_i[i] xor b0[i]
 # ----------------------------------------------------------------
 
-func shortDomainSepTag*[DigestSize: static int, B: byte|char](
+func shortDomainSepTag*[DigestSize: static int](
        H: type CryptoHash,
        output: var array[DigestSize, byte],
-       oversizedDST: openarray[B]) =
+       oversizedDST: openArray[byte]) {.genCharAPI.} =
   ## Compute a short Domain Separation Tag
   ## from a domain separation tag larger than 255 bytes
   ##
@@ -52,13 +52,13 @@ func shortDomainSepTag*[DigestSize: static int, B: byte|char](
   ctx.update oversizedDST
   ctx.finish(output)
 
-func expandMessageXMD*[B1, B2, B3: byte|char, len_in_bytes: static int](
+func expandMessageXMD*[len_in_bytes: static int](
        H: type CryptoHash,
        output: var array[len_in_bytes, byte],
-       augmentation: openarray[B1],
-       message: openarray[B2],
-       domainSepTag: openarray[B3]
-     ) =
+       augmentation: openArray[byte],
+       message: openArray[byte],
+       domainSepTag: openArray[byte]
+     ) {.genCharAPI.} =
   ## The expand_message_xmd function produces a uniformly random byte
   ## string using a cryptographic hash function H that outputs "b" bits,
   ## with b >= 2*k and k the target security level (for example 128-bit)
@@ -77,7 +77,7 @@ func expandMessageXMD*[B1, B2, B3: byte|char, len_in_bytes: static int](
   ## - `augmentation`, an optional augmentation to the message. This will be prepended,
   ##   prior to hashing.
   ##   This is used for building the "message augmentation" variant of BLS signatures
-  ##   https://tools.ietf.org/html/draft-irtf-cfrg-bls-signature-04#section-3.2
+  ##   https://www.ietf.org/archive/id/draft-irtf-cfrg-bls-signature-05.html#section-3.2
   ##   which requires `CoreSign(SK, PK || message)`
   ##   and `CoreVerify(PK, PK || message, signature)`
   ## - `message` is the message to hash
@@ -163,14 +163,14 @@ func mulMont(r: var BigInt, a, b: BigInt, FF: type) {.inline.} =
     FF.getSpareBits()
   )
 
-func hashToField*[Field; B1, B2, B3: byte|char, count: static int](
+func hashToField*[Field; count: static int](
        H: type CryptoHash,
        k: static int,
        output: var array[count, Field],
-       augmentation: openarray[B1],
-       message: openarray[B2],
-       domainSepTag: openarray[B3]
-     ) =
+       augmentation: openArray[byte],
+       message: openArray[byte],
+       domainSepTag: openArray[byte]
+     ) {.genCharAPI.} =
   ## Hash to a field or an extension field
   ## https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-hash-to-curve-11#section-5.3
   ##
@@ -186,7 +186,7 @@ func hashToField*[Field; B1, B2, B3: byte|char, count: static int](
   ## - `augmentation`, an optional augmentation to the message. This will be prepended,
   ##   prior to hashing.
   ##   This is used for building the "message augmentation" variant of BLS signatures
-  ##   https://tools.ietf.org/html/draft-irtf-cfrg-bls-signature-04#section-3.2
+  ##   https://www.ietf.org/archive/id/draft-irtf-cfrg-bls-signature-05.html#section-3.2
   ##   which requires `CoreSign(SK, PK || message)`
   ##   and `CoreVerify(PK, PK || message, signature)`
   ## - `message` is the message to hash
diff --git a/constantine/hash_to_curve/hash_to_curve.nim b/constantine/hash_to_curve/hash_to_curve.nim
index ef45b4900..60998a259 100644
--- a/constantine/hash_to_curve/hash_to_curve.nim
+++ b/constantine/hash_to_curve/hash_to_curve.nim
@@ -8,7 +8,7 @@
 
 import
   # Internals
-  ../platforms/abstractions,
+  ../platforms/[abstractions, views],
   ../math/config/curves,
   ../math/[arithmetic, extension_fields],
   ../math/constants/[zoo_hash_to_curve, zoo_subgroups],
@@ -43,7 +43,7 @@ func mapToCurve_svdw[F, G](
   ## Deterministically map a field element u
   ## to an elliptic curve point `r`
   ## https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-hash-to-curve-14#section-6.6.1
-  
+
   var
     tv1 {.noInit.}, tv2{.noInit.}, tv3{.noInit.}: F
     tv4{.noInit.}: F
@@ -62,7 +62,7 @@ func mapToCurve_svdw[F, G](
     tv1.c1.neg()
   tv3.prod(tv1, tv2)
   tv3.inv()
-  
+
   tv4.prod(u, tv1)
   tv4 *= tv3
   tv4.mulCheckSparse(h2cConst(F.C, svdw, G, z3))
@@ -87,7 +87,7 @@ func mapToCurve_svdw[F, G](
 
   r.y.curve_eq_rhs(r.x, G)
   r.y.sqrt()
-  
+
   r.y.cneg(sgn0(u) xor sgn0(r.y))
 
 func mapToIsoCurve_sswuG1_opt3mod4[F](
@@ -101,8 +101,7 @@ func mapToIsoCurve_sswuG1_opt3mod4[F](
   mapToIsoCurve_sswuG1_opt3mod4(
     xn, xd,
     yn,
-    u, xd3
-  )
+    u, xd3)
 
   # Convert to Jacobian
   r.z = xd          # Z = xd
@@ -120,8 +119,7 @@ func mapToIsoCurve_sswuG2_opt9mod16[F](
   mapToIsoCurve_sswuG2_opt9mod16(
     xn, xd,
     yn,
-    u, xd3
-  )
+    u, xd3)
 
   # Convert to Jacobian
   r.z = xd          # Z = xd
@@ -167,7 +165,7 @@ func mapToCurve_sswu_fusedAdd[F; G: static Subgroup](
     # Simplified Shallue-van de Woestijne-Ulas method for AB == 0
 
     var P0{.noInit.}, P1{.noInit.}: ECP_ShortW_Jac[F, G]
-    
+
     # 1. Map to E' isogenous to E
     when F is Fp and F.C.has_P_3mod4_primeModulus():
       # 1. Map to E'1 isogenous to E1
@@ -191,16 +189,13 @@ func mapToCurve_sswu_fusedAdd[F; G: static Subgroup](
 # Hash to curve
 # ----------------------------------------------------------------
 
-func hashToCurve_svdw*[
-         F; G: static Subgroup;
-         B1, B2, B3: byte|char](
+func hashToCurve_svdw*[F; G: static Subgroup](
        H: type CryptoHash,
        k: static int,
        output: var ECP_ShortW_Jac[F, G],
-       augmentation: openarray[B1],
-       message: openarray[B2],
-       domainSepTag: openarray[B3]
-     ) =
+       augmentation: openArray[byte],
+       message: openArray[byte],
+       domainSepTag: openArray[byte]) {.genCharAPI.} =
   ## Hash a message to an elliptic curve
   ##
   ## Arguments:
@@ -215,14 +210,14 @@ func hashToCurve_svdw*[
   ## - `augmentation`, an optional augmentation to the message. This will be prepended,
   ##   prior to hashing.
   ##   This is used for building the "message augmentation" variant of BLS signatures
-  ##   https://tools.ietf.org/html/draft-irtf-cfrg-bls-signature-04#section-3.2
+  ##   https://www.ietf.org/archive/id/draft-irtf-cfrg-bls-signature-05.html#section-3.2
   ##   which requires `CoreSign(SK, PK || message)`
   ##   and `CoreVerify(PK, PK || message, signature)`
   ## - `message` is the message to hash
   ## - `domainSepTag` is the protocol domain separation tag (DST).
 
   var u{.noInit.}: array[2, F]
-  if domainSepTag.len <= 255: 
+  if domainSepTag.len <= 255:
     H.hashToField(k, u, augmentation, message, domainSepTag)
   else:
     const N = H.type.digestSize()
@@ -233,16 +228,13 @@ func hashToCurve_svdw*[
   output.mapToCurve_svdw_fusedAdd(u[0], u[1])
   output.clearCofactor()
 
-func hashToCurve_sswu*[
-         F; G: static Subgroup;
-         B1, B2, B3: byte|char](
+func hashToCurve_sswu*[F; G: static Subgroup](
        H: type CryptoHash,
        k: static int,
        output: var ECP_ShortW_Jac[F, G],
-       augmentation: openarray[B1],
-       message: openarray[B2],
-       domainSepTag: openarray[B3]
-     ) =
+       augmentation: openArray[byte],
+       message: openArray[byte],
+       domainSepTag: openArray[byte]) {.genCharAPI.} =
   ## Hash a message to an elliptic curve
   ##
   ## Arguments:
@@ -257,14 +249,14 @@ func hashToCurve_sswu*[
   ## - `augmentation`, an optional augmentation to the message. This will be prepended,
   ##   prior to hashing.
   ##   This is used for building the "message augmentation" variant of BLS signatures
-  ##   https://tools.ietf.org/html/draft-irtf-cfrg-bls-signature-04#section-3.2
+  ##   https://www.ietf.org/archive/id/draft-irtf-cfrg-bls-signature-05.html#section-3.2
   ##   which requires `CoreSign(SK, PK || message)`
   ##   and `CoreVerify(PK, PK || message, signature)`
   ## - `message` is the message to hash
   ## - `domainSepTag` is the protocol domain separation tag (DST).
 
   var u{.noInit.}: array[2, F]
-  if domainSepTag.len <= 255: 
+  if domainSepTag.len <= 255:
     H.hashToField(k, u, augmentation, message, domainSepTag)
   else:
     const N = H.type.digestSize()
@@ -275,16 +267,13 @@ func hashToCurve_sswu*[
   output.mapToCurve_sswu_fusedAdd(u[0], u[1])
   output.clearCofactor()
 
-func hashToCurve*[
-         F; G: static Subgroup;
-         B1, B2, B3: byte|char](
+func hashToCurve*[F; G: static Subgroup](
        H: type CryptoHash,
        k: static int,
        output: var ECP_ShortW_Jac[F, G],
-       augmentation: openarray[B1],
-       message: openarray[B2],
-       domainSepTag: openarray[B3]
-     ) {.inline.} =
+       augmentation: openArray[byte],
+       message: openArray[byte],
+       domainSepTag: openArray[byte]) {.inline, genCharAPI.} =
   ## Hash a message to an elliptic curve
   ##
   ## Arguments:
@@ -299,7 +288,7 @@ func hashToCurve*[
   ## - `augmentation`, an optional augmentation to the message. This will be prepended,
   ##   prior to hashing.
   ##   This is used for building the "message augmentation" variant of BLS signatures
-  ##   https://tools.ietf.org/html/draft-irtf-cfrg-bls-signature-04#section-3.2
+  ##   https://www.ietf.org/archive/id/draft-irtf-cfrg-bls-signature-05.html#section-3.2
   ##   which requires `CoreSign(SK, PK || message)`
   ##   and `CoreVerify(PK, PK || message, signature)`
   ## - `message` is the message to hash
@@ -313,16 +302,13 @@ func hashToCurve*[
   else:
     {.error: "Not implemented".}
 
-func hashToCurve*[
-         F; G: static Subgroup;
-         B1, B2, B3: byte|char](
+func hashToCurve*[F; G: static Subgroup](
        H: type CryptoHash,
        k: static int,
        output: var (ECP_ShortW_Prj[F, G] or ECP_ShortW_Aff[F, G]),
-       augmentation: openarray[B1],
-       message: openarray[B2],
-       domainSepTag: openarray[B3]
-     ) {.inline.} =
+       augmentation: openArray[byte],
+       message: openArray[byte],
+       domainSepTag: openArray[byte]) {.inline, genCharAPI.} =
   ## Hash a message to an elliptic curve
   ##
   ## Arguments:
@@ -337,12 +323,12 @@ func hashToCurve*[
   ## - `augmentation`, an optional augmentation to the message. This will be prepended,
   ##   prior to hashing.
   ##   This is used for building the "message augmentation" variant of BLS signatures
-  ##   https://tools.ietf.org/html/draft-irtf-cfrg-bls-signature-04#section-3.2
+  ##   https://www.ietf.org/archive/id/draft-irtf-cfrg-bls-signature-05.html#section-3.2
   ##   which requires `CoreSign(SK, PK || message)`
   ##   and `CoreVerify(PK, PK || message, signature)`
   ## - `message` is the message to hash
   ## - `domainSepTag` is the protocol domain separation tag (DST).
-  
+
   var Pjac{.noInit.}: ECP_ShortW_Jac[F, G]
   H.hashToCurve(k, Pjac, augmentation, message, domainSepTag)
   when output is ECP_ShortW_Prj:
diff --git a/constantine/hashes.nim b/constantine/hashes.nim
index 1dec94a64..5569fbb04 100644
--- a/constantine/hashes.nim
+++ b/constantine/hashes.nim
@@ -6,6 +6,8 @@
 #   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
+import platforms/views
+
 # ############################################################
 #
 #                Hash Function concept
@@ -30,23 +32,19 @@ type
 
     # Context
     # -------------------------------------------
-    # update/finish are not matching properly
-
-    # type B = char or byte
     ctx.init()
-    # ctx.update(openarray[B])
-    # ctx.finish(var array[H.digestSize, byte])
+    ctx.update(openarray[byte])
+    ctx.finish(var array[H.digestSize, byte])
     ctx.clear()
 
-func hash*[DigestSize: static int, T: char|byte](
+func hash*[DigestSize: static int](
        HashKind: type CryptoHash,
        digest: var array[DigestSize, byte],
-       message: openarray[T],
-       clearMem = false) =
+       message: openArray[byte],
+       clearMem = false) {.genCharAPI.} =
   ## Produce a digest from a message
   static: doAssert DigestSize == HashKind.type.digestSize
 
-  mixin update, finish
   var ctx {.noInit.}: HashKind
   ctx.init()
   ctx.update(message)
@@ -55,10 +53,10 @@ func hash*[DigestSize: static int, T: char|byte](
   if clearMem:
     ctx.clear()
 
-func hash*[T: char|byte](
+func hash*(
        HashKind: type CryptoHash,
-       message: openarray[T],
-       clearmem = false): array[HashKind.digestSize, byte] {.noInit.} =
+       message: openArray[byte],
+       clearmem = false): array[HashKind.digestSize, byte] {.noInit, genCharAPI.} =
   ## Produce a digest from a message
   HashKind.hash(result, message, clearMem)
 
diff --git a/constantine/hashes/h_sha256.nim b/constantine/hashes/h_sha256.nim
index 38b5b0e88..ba5a1dd85 100644
--- a/constantine/hashes/h_sha256.nim
+++ b/constantine/hashes/h_sha256.nim
@@ -6,8 +6,10 @@
 #   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
+import ../zoo_exports
+
 import
-  ../platforms/[abstractions, endians],
+  ../platforms/[abstractions, endians, views],
   ./sha256/sha256_generic
 
 when UseASM_X86_32:
@@ -82,7 +84,7 @@ template internalBlockSize*(H: type sha256): int =
   ## Returns the byte size of the hash function ingested blocks
   BlockSize
 
-func init*(ctx: var Sha256Context) =
+func init*(ctx: var Sha256Context) {.libPrefix: prefix_sha256.} =
   ## Initialize or reinitialize a Sha256 context
 
   ctx.msgLen = 0
@@ -119,7 +121,7 @@ func initZeroPadded*(ctx: var Sha256Context) =
   ctx.s.H[6] = 0xbafef9ea'u32
   ctx.s.H[7] = 0x1837a9d8'u32
 
-func update*(ctx: var Sha256Context, message: openarray[byte]) =
+func update*(ctx: var Sha256Context, message: openarray[byte]) {.libPrefix: prefix_sha256, genCharAPI.} =
   ## Append a message to a SHA256 context
   ## for incremental SHA256 computation
   ##
@@ -132,7 +134,7 @@ func update*(ctx: var Sha256Context, message: openarray[byte]) =
   ##
   ## For passwords and secret keys, you MUST NOT use raw SHA-256
   ## use a Key Derivation Function instead (KDF)
-  
+
   # Message processing state machine
   var bufIdx = uint(ctx.msgLen mod BlockSize)
   var cur = 0'u
@@ -141,12 +143,12 @@ func update*(ctx: var Sha256Context, message: openarray[byte]) =
   if bufIdx != 0 and bufIdx+bytesLeft >= BlockSize:
     # Previous partial update, fill the buffer and do one sha256 hash
     let free = BlockSize - bufIdx
-    ctx.buf.copy(dStart = bufIdx, message, sStart = 0, len = free)
+    ctx.buf.rawCopy(dStart = bufIdx, message, sStart = 0, len = free)
     ctx.hashBuffer()
     bufIdx = 0
     cur = free
     bytesLeft -= free
-  
+
   if bytesLeft >= BlockSize:
     # Process n blocks (64 byte each)
     let numBlocks = bytesLeft div BlockSize
@@ -156,26 +158,11 @@ func update*(ctx: var Sha256Context, message: openarray[byte]) =
 
   if bytesLeft != 0:
     # Store the tail in buffer
-    ctx.buf.copy(dStart = bufIdx, message, sStart = cur, len = bytesLeft)
+    ctx.buf.rawCopy(dStart = bufIdx, message, sStart = cur, len = bytesLeft)
 
   ctx.msgLen += message.len.uint
 
-func update*(ctx: var Sha256Context, message: openarray[char]) {.inline.} =
-  ## Append a message to a SHA256 context
-  ## for incremental SHA256 computation
-  ##
-  ## Security note: the tail of your message might be stored
-  ## in an internal buffer.
-  ## if sensitive content is used, ensure that
-  ## `ctx.finish(...)` and `ctx.clear()` are called as soon as possible.
-  ## Additionally ensure that the message(s) passed were stored
-  ## in memory considered secure for your threat model.
-  ##
-  ## For passwords and secret keys, you MUST NOT use raw SHA-256
-  ## use a Key Derivation Function instead (KDF)
-  ctx.update(message.toOpenArrayByte(message.low, message.high))
-
-func finish*(ctx: var Sha256Context, digest: var array[32, byte]) =
+func finish*(ctx: var Sha256Context, digest: var array[32, byte]) {.libPrefix: prefix_sha256.} =
   ## Finalize a SHA256 computation and output the
   ## message digest to the `digest` buffer.
   ##
@@ -205,7 +192,7 @@ func finish*(ctx: var Sha256Context, digest: var array[32, byte]) =
   ctx.s.hashMessageBlocks(ctx.buf.asUnchecked(), numBlocks = 1)
   digest.dumpHash(ctx.s)
 
-func clear*(ctx: var Sha256Context) =
+func clear*(ctx: var Sha256Context) {.libPrefix: prefix_sha256.} =
   ## Clear the context internal buffers
   ## Security note:
   ## For passwords and secret keys, you MUST NOT use raw SHA-256
diff --git a/constantine/kdf/kdf_hkdf.nim b/constantine/kdf/kdf_hkdf.nim
index c7012c505..92854bcd3 100644
--- a/constantine/kdf/kdf_hkdf.nim
+++ b/constantine/kdf/kdf_hkdf.nim
@@ -9,7 +9,7 @@
 import
   ../hashes,
   ../mac/mac_hmac,
-  ../platforms/primitives
+  ../platforms/[primitives, views]
 
 # HMAC-based Extract-and-Expand Key Derivation Function (HKDF)
 # ------------------------------------------------------------
@@ -21,15 +21,18 @@ import
 type HKDF*[H: CryptoHash] = object
   hmac: HMAC[H]
 
-func hkdf_extract_init*[H: CryptoHash, S, I: char|byte](
+func clear*(ctx: var HKDF) {.inline.} =
+  ctx.hmac.clear()
+
+func hkdf_extract_init*[H: CryptoHash](
        ctx: var HKDF[H],
-       salt: openArray[S],
-       ikm: openArray[I]) {.inline.}=
+       salt: openArray[byte],
+       ikm: openArray[byte]) {.inline.}=
   ctx.hmac.init(salt)
   ctx.hmac.update(ikm)
 
-func hkdf_extract_append_to_IKM*[H: CryptoHash, T: char|byte](
-       ctx: var HKDF[H], append: openArray[T]) {.inline.} =
+func hkdf_extract_append_to_IKM*[H: CryptoHash](
+       ctx: var HKDF[H], append: openArray[byte]) {.inline.} =
   ctx.hmac.update(append)
 
 func hkdf_extract_finish*[H: CryptoHash, N: static int](
@@ -38,11 +41,11 @@ func hkdf_extract_finish*[H: CryptoHash, N: static int](
   static: doAssert H.digestSize == N
   ctx.hmac.finish(prk)
 
-func hkdfExtract*[H: CryptoHash;S,I: char|byte, N: static int](
+func hkdfExtract*[H: CryptoHash; N: static int](
                      ctx: var HKDF[H],
                      prk: var array[N, byte],
-                     salt: openArray[S],
-                     ikm: openArray[I]) {.inline.} =
+                     salt: openArray[byte],
+                     ikm: openArray[byte]) {.inline.} =
   ## "Extract" step of HKDF.
   ## Extract a fixed size pseudom-random key
   ## from an optional salt value
@@ -69,17 +72,17 @@ func hkdfExtract*[H: CryptoHash;S,I: char|byte, N: static int](
   ctx.hkdf_extract_init(salt, ikm)
   ctx.hkdf_extract_finish(prk)
 
-iterator hkdfExpandChunk*[H: CryptoHash; N: static int; I, A: char|byte](
+iterator hkdfExpandChunk*[H: CryptoHash; N: static int](
           ctx: var HKDF[H],
           chunk: var array[N, byte],
           prk: array[N, byte],
-          info: openArray[I],
-          append: openArray[A]): int =
+          info: openArray[byte],
+          append: openArray[byte]): int =
   ## "Expand" step of HKDF, with an iterator with up to 255 iterations.
-  ## 
+  ##
   ## Note: The output MUST be at most 255 iterations as per RFC5869
   ##       https://datatracker.ietf.org/doc/html/rfc5869
-  ## 
+  ##
   ## Expand a fixed size pseudo random-key
   ## into several pseudo-random keys
   ##
@@ -94,12 +97,15 @@ iterator hkdfExpandChunk*[H: CryptoHash; N: static int; I, A: char|byte](
   ## - chunk:
   ##   In:  OKMᵢ₋₁ (output keying material chunk i-1)
   ##   Out: OKMᵢ (output keying material chunk i).
-  ## 
+  ##
   ## Output:
   ## - returns the current chunk number i
-  ## 
+  ##
   ## Temporary:
   ## - ctx: a HMAC["cryptographic-hash"] context, for example HMAC[sha256].
+  ##
+  ## After iterating, the HKDF context should be cleared
+  ## if secret keying material was used.
 
   const HashLen = H.digestSize()
   static: doAssert N == HashLen
@@ -117,12 +123,13 @@ iterator hkdfExpandChunk*[H: CryptoHash; N: static int; I, A: char|byte](
 
     yield i
 
-func hkdfExpand*[H: CryptoHash; K: static int; I, A: char|byte](
+func hkdfExpand*[H: CryptoHash; K: static int](
                     ctx: var HKDF[H],
                     output: var openArray[byte],
                     prk: array[K, byte],
-                    info: openArray[I],
-                    append: openArray[A]) =
+                    info: openArray[byte],
+                    append: openArray[byte],
+                    clearMem = false) =
   ## "Expand" step of HKDF
   ## Expand a fixed size pseudo random-key
   ## into several pseudo-random keys
@@ -153,18 +160,20 @@ func hkdfExpand*[H: CryptoHash; K: static int; I, A: char|byte](
   for i in ctx.hkdfExpandChunk(t, prk, info, append):
     let iStart = i * HashLen
     let size = min(HashLen, output.len - iStart)
-    copy(output, iStart, t, 0, size)
-   
+    rawCopy(output, iStart, t, 0, size)
+
     if iStart+HashLen >= output.len:
       break
 
-  # ctx.clear() - TODO: very expensive
+  if clearMem:
+    ctx.clear()
 
-func hkdfExpand*[H: CryptoHash; K: static int; I: char|byte](
+func hkdfExpand*[H: CryptoHash; K: static int](
                     ctx: var HKDF[H],
                     output: var openArray[byte],
                     prk: array[K, byte],
-                    info: openArray[I]) {.inline.} =
+                    info: openArray[byte],
+                    clearMem = false) {.inline.} =
   ## "Expand" step of HKDF
   ## Expand a fixed size pseudo random-key
   ## into several pseudo-random keys
@@ -178,17 +187,18 @@ func hkdfExpand*[H: CryptoHash; K: static int; I: char|byte](
   ##
   ## Temporary:
   ## - ctx: a HMAC["cryptographic-hash"] context, for example HMAC[sha256].
-  hkdfExpand(ctx, output, prk, info, default(array[0, byte]))
+  hkdfExpand(ctx, output, prk, info, default(array[0, byte]), clearMem)
 
-func hkdf*[H: CryptoHash, N: static int, O, S, K, I: char|byte](
+func hkdf*[H: CryptoHash, N: static int](
        Hash: typedesc[H],
-       output: var openArray[O],
-       salt: openArray[S],
-       ikm: openArray[K],
-       info: openArray[I]) {.inline.} =
+       output: var openArray[byte],
+       salt: openArray[byte],
+       ikm: openArray[byte],
+       info: openArray[byte],
+       clearMem = false) {.inline, genCharAPI.} =
   ## HKDF
   ## Inputs:
-  ## - A hash function, with an output digest length HashLen 
+  ## - A hash function, with an output digest length HashLen
   ## - An opttional salt value (non-secret random value), if not provided,
   ##   it is set to an array of HashLen zero bytes
   ## - A secret Input Keying Material
@@ -197,4 +207,4 @@ func hkdf*[H: CryptoHash, N: static int, O, S, K, I: char|byte](
   var ctx{.noInit.}: HMAC[H]
   var prk{.noInit.}: array[H.digestSize(), byte]
   ctx.hkdfExtract(prk, salt, ikm)
-  ctx.hkdfExpand(output, prk, info)
+  ctx.hkdfExpand(output, prk, info, clearMem)
diff --git a/constantine/mac/mac_hmac.nim b/constantine/mac/mac_hmac.nim
index d7eacf66f..737ab2564 100644
--- a/constantine/mac/mac_hmac.nim
+++ b/constantine/mac/mac_hmac.nim
@@ -8,7 +8,7 @@
 
 import
   ../hashes,
-  ../platforms/primitives
+  ../platforms/[primitives, views]
 
 # HMAC: Keyed-Hashing for Message Authentication
 # ----------------------------------------------
@@ -26,19 +26,19 @@ type HMAC*[H: CryptoHash] = object
   inner: H
   outer: H
 
-func init*[H: CryptoHash, T: char|byte](ctx: var HMAC[H], secretKey: openArray[T]) =
+func init*[H: CryptoHash](ctx: var HMAC[H], secretKey: openArray[byte]) {.genCharAPI.} =
   ## Initialize a HMAC-based Message Authentication Code
   ## with a pre-shared secret key
   ## between the parties that want to authenticate messages between each other.
-  ## 
+  ##
   ## Keys should be at least the same size as the hash function output size.
-  ## 
+  ##
   ## Keys need to be chosen at random (or using a cryptographically strong
   ## pseudo-random generator seeded with a random seed), and periodically
   ## refreshed.
   var key{.noInit.}: array[H.internalBlockSize(), byte]
   if secretKey.len <= key.len:
-    copy(key, 0, secretKey, 0, secretKey.len)
+    rawCopy(key, 0, secretKey, 0, secretKey.len)
     for i in secretKey.len ..< key.len:
       key[i] = byte 0
   else:
@@ -62,15 +62,15 @@ func init*[H: CryptoHash, T: char|byte](ctx: var HMAC[H], secretKey: openArray[T
   ctx.outer.init()
   ctx.outer.update(key)
 
-func update*[H: CryptoHash, T: char|byte](ctx: var HMAC[H], message: openArray[T]) =
+func update*[H: CryptoHash](ctx: var HMAC[H], message: openArray[byte]) {.genCharAPI.} =
   ## Append a message to a HMAC authentication context.
   ## for incremental HMAC computation.
   ctx.inner.update(message)
 
-func finish*[H: CryptoHash, T: char|byte, N: static int](ctx: var HMAC[H], tag: var array[N, T]) =
+func finish*[H: CryptoHash, N: static int](ctx: var HMAC[H], tag: var array[N, byte]) =
   ## Finalize a HMAC authentication
   ## and output an authentication tag to the `tag` buffer
-  ## 
+  ##
   ## Output may be used truncated, with the leftmost bits are kept.
   ## It is recommended that the tag length is at least half the length of the hash output
   ## and at least 80-bits.
@@ -85,17 +85,18 @@ func clear*[H: CryptoHash](ctx: var HMAC[H]) =
   ctx.inner.clear()
   ctx.outer.clear()
 
-func mac*[T: char|byte, H: CryptoHash, N: static int](
+func mac*[T0, T1: char|byte, H: CryptoHash, N: static int](
        Hash: type HMAC[H],
        tag: var array[N, byte],
-       message: openArray[T],
-       secretKey: openarray[T],
+       message: openArray[T0],
+       secretKey: openArray[T1],
        clearMem = false) =
   ## Produce an authentication tag from a message
   ## and a preshared unique non-reused secret key
-  
+  # TODO: we can't use the {.genCharAPI.} macro
+  #       due to 2 openArray[bytes] and the CryptoHash concept
   static: doAssert N == H.digestSize()
-  
+
   var ctx {.noInit.}: HMAC[H]
   ctx.init(secretKey)
   ctx.update(message)
diff --git a/constantine/mac/mac_poly1305.nim b/constantine/mac/mac_poly1305.nim
index 38c2af3e4..39ba542b4 100644
--- a/constantine/mac/mac_poly1305.nim
+++ b/constantine/mac/mac_poly1305.nim
@@ -7,7 +7,7 @@
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
 import
-  ../platforms/abstractions,
+  ../platforms/[abstractions, views],
   ../math/arithmetic/bigints,
   ../math/arithmetic/[limbs, limbs_extmul],
   ../math/io/io_bigints
@@ -42,7 +42,7 @@ func partialReduce_1305[N1, N2: static int](r: var Limbs[N1], a: Limbs[N2]) =
   ##        2ᵐ-c ≡  0     (mod p)
   ##   <=>  2ᵐ   ≡  c     (mod p)   [1]
   ##   <=> a2ᵐ+b ≡ ac + b (mod p)
-  ## 
+  ##
   ## This partially reduces the input in range [0, 2¹³⁰)
   #
   # Assuming 64-bit words,
@@ -51,25 +51,25 @@ func partialReduce_1305[N1, N2: static int](r: var Limbs[N1], a: Limbs[N2]) =
   # Assuming 32-bit words,
   #   N1 = 5 words (160-bit necessary for 2¹³⁰-1)
   #   N2 = 8 words (288-bit necessary for 2¹³¹.2¹²⁴)
-  # 
+  #
   # from 64-bit, starting from [1]
   #   2ᵐ      ≡  c     (mod p)
   #   2¹³⁰    ≡  5     (mod p)
   # 2¹³⁰.2⁶²  ≡  5.2⁶² (mod p)
   #   2¹⁹²    ≡  5.2⁶² (mod p)
-  # 
+  #
   # Hence if we call a the [2¹⁹², 2²⁶⁰) range
   # and b the [0, 2¹⁹²) range
   # we have
   # a2¹⁹²+b ≡ a.5.2⁶² + b (mod p)
-  # 
+  #
   # Then we can handle the highest word which has
   # 62 bits that should be folded back as well
-  # 
+  #
   # Similarly for 32-bit
   #   2¹⁶⁰    ≡  5.2³⁰ (mod p)
   # and we need to fold back the top 30 bits
-  # 
+  #
   # But there is a twist. 5.2⁶² need 65-bit not 64
   # and 5.2³⁰ need 33-bit not 32
 
@@ -77,7 +77,7 @@ func partialReduce_1305[N1, N2: static int](r: var Limbs[N1], a: Limbs[N2]) =
     static:
       doAssert N1 == 3
       doAssert N2 == 4
-    
+
     block:
       # First pass, fold everything greater than 2¹⁹²-1
       # a2¹⁹²+b ≡ a.5.2⁶² + b (mod p)
@@ -99,7 +99,7 @@ func partialReduce_1305[N1, N2: static int](r: var Limbs[N1], a: Limbs[N2]) =
     static:
       doAssert N1 == 5
       doAssert N2 == 8
-    
+
     block:
       # First pass, fold everything greater than 2¹⁶⁰-1
       # a2¹⁶⁰+b ≡ a.5.2³⁰ + b (mod p)
@@ -109,7 +109,7 @@ func partialReduce_1305[N1, N2: static int](r: var Limbs[N1], a: Limbs[N2]) =
 
       staticFor i, 0, N1:
         r[i] = a[i]
-      
+
       mulDoubleAcc(r[2], r[1], r[0], a[5], cExcess)
       mulDoubleAcc(r[3], r[2], r[1], a[6], cExcess)
       mulDoubleAcc(r[4], r[3], r[2], a[7], cExcess)
@@ -122,7 +122,7 @@ func partialReduce_1305[N1, N2: static int](r: var Limbs[N1], a: Limbs[N2]) =
   var carry, carry2: Carry
   var hi = r[N1-1] shr (WordBitWidth - excessBits)
   r[N1-1] = r[N1-1] and (MaxWord shr excessBits)
-  
+
   # hi *= 5, with overflow stored in carry
   let hi4 = hi shl 2                   # Cannot overflow as we have 2 spare bits
   addC(carry2, hi, hi, hi4, Carry(0))  # Use the carry bit for storing a 63/31 bit result
@@ -132,7 +132,7 @@ func partialReduce_1305[N1, N2: static int](r: var Limbs[N1], a: Limbs[N2]) =
   addC(carry, r[1], r[1], SecretWord(carry2), carry)
   staticFor i, 2, N1:
     addC(carry, r[i], r[i], Zero, carry)
-  
+
 func finalReduce_1305[N: static int](a: var Limbs[N]) =
   ## Maps an input in redundant representation [0, 2¹³¹-10)
   ## to the canonical representation in [0, 2¹³⁰-5)
@@ -157,10 +157,10 @@ type Poly1305_CTX = object
 
 type poly1305* = Poly1305_CTX
 
-func macMessageBlocks[T: byte|char](
+func macMessageBlocks(
        acc: var BigInt[130+1],
        r: BigInt[124],
-       message: openArray[T],
+       message: openArray[byte],
        blockSize = BlockSize): uint =
   ## Authenticate a message block by block
   ## Poly1305 block size is 16 bytes.
@@ -180,20 +180,13 @@ func macMessageBlocks[T: byte|char](
 
   for curBlock in 0 ..< numBlocks:
     # range [0, 2¹²⁸-1)
-    when T is byte:
-      input.unmarshal(
-        message.toOpenArray(curBlock*BlockSize, curBlock*BlockSize + BlockSize - 1),
-        littleEndian
-      )
-    else:
-      input.unmarshal(
-        message.toOpenArrayByte(curBlock*BlockSize, curBlock*BlockSize + BlockSize - 1),
-        littleEndian
-      )
+    input.unmarshal(
+      message.toOpenArray(curBlock*BlockSize, curBlock*BlockSize + BlockSize - 1),
+      littleEndian)
     input.setBit(8*blockSize) # range [2¹²⁸, 2¹²⁸+2¹²⁸-1)
     acc += input              # range [2¹²⁸, 2¹³⁰-1+2¹²⁸+2¹²⁸-1)
     t.prod(acc, r)            # range [2²⁵⁶, (2¹²⁴-1)(2¹³⁰+2(2¹²⁸-1)))
-    
+
     acc.limbs.partialReduce_1305(t.limbs)
 
   return BlockSize * numBlocks.uint
@@ -213,7 +206,7 @@ func init*(ctx: var Poly1305_CTX, nonReusedKey: array[32, byte]) =
   ## nonReusedKey is an unique not-reused pre-shared key
   ## between the parties that want to authenticate messages between each other
   ctx.acc.setZero()
-  
+
   const clamp = BigInt[128].fromHex"0x0ffffffc0ffffffc0ffffffc0fffffff"
   ctx.r.unmarshal(nonReusedKey.toOpenArray(0, 15), littleEndian)
   staticFor i, 0, ctx.r.limbs.len:
@@ -224,7 +217,7 @@ func init*(ctx: var Poly1305_CTX, nonReusedKey: array[32, byte]) =
   ctx.msgLen = 0
   ctx.bufIdx = 0
 
-func update*[T: char|byte](ctx: var Poly1305_CTX, message: openArray[T]) =
+func update*(ctx: var Poly1305_CTX, message: openArray[byte]) {.genCharAPI.} =
   ## Append a message to a Poly1305 authentication context.
   ## for incremental Poly1305 computation
   ##
@@ -246,7 +239,7 @@ func update*[T: char|byte](ctx: var Poly1305_CTX, message: openArray[T]) =
   var # Message processing state machine
     cur = 0'u
     bytesLeft = message.len.uint
-  
+
   ctx.msgLen += bytesLeft
 
   if ctx.bufIdx != 0: # Previous partial update
@@ -255,21 +248,21 @@ func update*[T: char|byte](ctx: var Poly1305_CTX, message: openArray[T]) =
 
     if free > bytesLeft:
       # Enough free space, store in buffer
-      ctx.buf.copy(dStart = bufIdx, message, sStart = 0, len = bytesLeft)
+      ctx.buf.rawCopy(dStart = bufIdx, message, sStart = 0, len = bytesLeft)
       ctx.bufIdx += bytesLeft.uint8
       return
     else:
       # Fill the buffer and do one Poly1305 MAC
-      ctx.buf.copy(dStart = bufIdx, message, sStart = 0, len = free)
+      ctx.buf.rawCopy(dStart = bufIdx, message, sStart = 0, len = free)
       ctx.macBuffer(blockSize = BlockSize)
 
       # Update message state for further processing
       cur = free
       bytesLeft -= free
-  
+
   # Process n blocks (16 bytes each)
   let consumed = ctx.acc.macMessageBlocks(
-    ctx.r, 
+    ctx.r,
     message.toOpenArray(int cur, message.len-1),
     blockSize = BlockSize
   )
@@ -282,7 +275,7 @@ func update*[T: char|byte](ctx: var Poly1305_CTX, message: openArray[T]) =
       doAssert ctx.bufIdx == 0
       doAssert cur + bytesLeft == message.len.uint
 
-    ctx.buf.copy(dStart = 0'u, message, sStart = cur, len = bytesLeft)
+    ctx.buf.rawCopy(dStart = 0'u, message, sStart = cur, len = bytesLeft)
     ctx.bufIdx = uint8 bytesLeft
 
 func finish*(ctx: var Poly1305_CTX, tag: var array[16, byte]) =
@@ -305,7 +298,7 @@ func finish*(ctx: var Poly1305_CTX, tag: var array[16, byte]) =
   # Input is only partially reduced to [0, 2¹³⁰)
   # Map it to [0, 2¹³⁰-5)
   ctx.acc.limbs.finalReduce_1305()
-  
+
   # Starting from now, we only care about the 128 least significant bits
   var acc128{.noInit.}: BigInt[128]
   acc128.copyTruncatedFrom(ctx.acc)
@@ -328,15 +321,15 @@ func clear*(ctx: var Poly1305_CTX) =
   ctx.msgLen = 0
   ctx.bufIdx = 0
 
-func mac*[T: char|byte](
+func mac*(
        _: type poly1305,
        tag: var array[16, byte],
-       message: openArray[T],
+       message: openArray[byte],
        nonReusedKey: array[32, byte],
-       clearMem = false) =
+       clearMem = false) {.genCharAPI.} =
   ## Produce an authentication tag from a message
   ## and a preshared unique non-reused secret key
-  
+
   var ctx {.noInit.}: poly1305
   ctx.init(nonReusedKey)
   ctx.update(message)
@@ -345,11 +338,11 @@ func mac*[T: char|byte](
   if clearMem:
     ctx.clear()
 
-func mac*[T: char|byte](
+func mac*(
        _: type poly1305,
-       message: openArray[T],
+       message: openArray[byte],
        nonReusedKey: array[32, byte],
-       clearMem = false): array[16, byte]{.noInit.}=
+       clearMem = false): array[16, byte]{.noInit, genCharAPI.}=
   ## Produce an authentication tag from a message
   ## and a preshared unique non-reused secret key
   poly1305.mac(result, message, nonReusedKey, clearMem)
diff --git a/constantine/math/arithmetic/assembly/limbs_asm_modular_dbl_prec_x86.nim b/constantine/math/arithmetic/assembly/limbs_asm_modular_dbl_prec_x86.nim
index d8c91b3f0..1d64685de 100644
--- a/constantine/math/arithmetic/assembly/limbs_asm_modular_dbl_prec_x86.nim
+++ b/constantine/math/arithmetic/assembly/limbs_asm_modular_dbl_prec_x86.nim
@@ -26,7 +26,8 @@ import
 # and so FpDbl would 768 bits.
 
 static: doAssert UseASM_X86_64
-{.localPassC:"-fomit-frame-pointer".} # Needed so that the compiler finds enough registers
+# Necessary for the compiler to find enough registers
+{.localPassC:"-fomit-frame-pointer".}  # (enabled at -O1)
 
 # Double-precision field addition
 # ------------------------------------------------------------
@@ -93,7 +94,7 @@ macro addmod2x_gen[N: static int](R: var Limbs[N], A, B: Limbs[N], m: Limbs[N di
 
   result.add ctx.generate
 
-func addmod2x_asm*[N: static int](r: var Limbs[N], a, b: Limbs[N], M: Limbs[N div 2]) =
+func addmod2x_asm*[N: static int](r: var Limbs[N], a, b: Limbs[N], M: Limbs[N div 2]) {.noInline.} =
   ## Constant-time double-precision addition
   ## Output is conditionally reduced by 2ⁿp
   ## to stay in the [0, 2ⁿp) range
@@ -159,7 +160,7 @@ macro submod2x_gen[N: static int](R: var Limbs[N], A, B: Limbs[N], m: Limbs[N di
 
   result.add ctx.generate
 
-func submod2x_asm*[N: static int](r: var Limbs[N], a, b: Limbs[N], M: Limbs[N div 2]) =
+func submod2x_asm*[N: static int](r: var Limbs[N], a, b: Limbs[N], M: Limbs[N div 2]) {.noInline.} =
   ## Constant-time double-precision substraction
   ## Output is conditionally reduced by 2ⁿp
   ## to stay in the [0, 2ⁿp) range
@@ -233,6 +234,6 @@ macro negmod2x_gen[N: static int](R: var Limbs[N], A: Limbs[N], m: Limbs[N div 2
     var `usym`{.noinit, used.}: typeof(`A`)
   result.add ctx.generate
 
-func negmod2x_asm*[N: static int](r: var Limbs[N], a: Limbs[N], M: Limbs[N div 2]) =
+func negmod2x_asm*[N: static int](r: var Limbs[N], a: Limbs[N], M: Limbs[N div 2]) {.noInline.} =
   ## Constant-time double-precision negation
   negmod2x_gen(r, a, M)
diff --git a/constantine/math/arithmetic/assembly/limbs_asm_modular_x86.nim b/constantine/math/arithmetic/assembly/limbs_asm_modular_x86.nim
index 73cb3c033..76a13c49f 100644
--- a/constantine/math/arithmetic/assembly/limbs_asm_modular_x86.nim
+++ b/constantine/math/arithmetic/assembly/limbs_asm_modular_x86.nim
@@ -25,22 +25,22 @@ import
 
 static: doAssert UseASM_X86_32
 
-{.localPassC:"-fomit-frame-pointer".} # Needed so that the compiler finds enough registers
+# Necessary for the compiler to find enough registers
+{.localPassC:"-fomit-frame-pointer".}  # (enabled at -O1)
 
 proc finalSubNoOverflowImpl*(
        ctx: var Assembler_x86,
        r: Operand or OperandArray,
-       a, M, scratch: OperandArray
-     ) =
+       a, M, scratch: OperandArray) =
   ## Reduce `a` into `r` modulo `M`
   ## To be used when the modulus does not use the full bitwidth of the storing words
   ## for example a 255-bit modulus in n words of total max size 2^256
-  ## 
+  ##
   ## r, a, scratch, scratchReg are mutated
   ## M is read-only
   let N = M.len
   ctx.comment "Final substraction (cannot overflow its limbs)"
-  
+
   # Substract the modulus, and test a < p with the last borrow
   ctx.mov scratch[0], a[0]
   ctx.sub scratch[0], M[0]
@@ -58,12 +58,11 @@ proc finalSubMayOverflowImpl*(
        ctx: var Assembler_x86,
        r: Operand or OperandArray,
        a, M, scratch: OperandArray,
-       scratchReg: Operand or Register or OperandReuse
-     ) =
+       scratchReg: Operand or Register or OperandReuse) =
   ## Reduce `a` into `r` modulo `M`
   ## To be used when the final substraction can
   ## also overflow the limbs (a 2^256 order of magnitude modulus stored in n words of total max size 2^256)
-  ## 
+  ##
   ## r, a, scratch, scratchReg are mutated
   ## M is read-only
   let N = M.len
@@ -97,7 +96,7 @@ macro finalSub_gen*[N: static int](
   ## Returns:
   ##   a-M if a > M
   ##   a otherwise
-  ## 
+  ##
   ## - r_PIR is a pointer to the result array, mutated,
   ## - a_EIR is an array of registers, mutated,
   ## - M_PIR is a pointer to an array, read-only,
@@ -173,8 +172,9 @@ macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], spareBits: s
 
   result.add ctx.generate()
 
-func addmod_asm*(r: var Limbs, a, b, m: Limbs, spareBits: static int) =
+func addmod_asm*(r: var Limbs, a, b, m: Limbs, spareBits: static int) {.noInline.} =
   ## Constant-time modular addition
+  # This MUST be noInline or Clang will run out of registers with LTO
   addmod_gen(r, a, b, m, spareBits)
 
 # Field substraction
@@ -233,9 +233,10 @@ macro submod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N]): untyped =
 
   result.add ctx.generate
 
-func submod_asm*(r: var Limbs, a, b, M: Limbs) =
+func submod_asm*(r: var Limbs, a, b, M: Limbs) {.noInline.} =
   ## Constant-time modular substraction
   ## Warning, does not handle aliasing of a and b
+  # This MUST be noInline or Clang will run out of registers with LTO
   submod_gen(r, a, b, M)
 
 # Field negation
diff --git a/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86.nim b/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86.nim
index 402b31bda..e376b9354 100644
--- a/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86.nim
+++ b/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86.nim
@@ -28,8 +28,9 @@ import
 
 static: doAssert UseASM_X86_64
 
-# Necessary for the compiler to find enough registers (enabled at -O1)
-{.localPassC:"-fomit-frame-pointer".}
+# Necessary for the compiler to find enough registers
+{.localPassC:"-fomit-frame-pointer".}  # (enabled at -O1)
+{.localPassC:"-fno-sanitize=address".} # need 15 registers out of 16 (1 reserved for stack pointer, none available for Address Sanitizer)
 
 # Montgomery multiplication
 # ------------------------------------------------------------
@@ -37,8 +38,7 @@ static: doAssert UseASM_X86_64
 macro mulMont_CIOS_sparebit_gen[N: static int](
         r_PIR: var Limbs[N], a_PIR, b_PIR,
         M_PIR: Limbs[N], m0ninv_REG: BaseType,
-        skipFinalSub: static bool
-      ): untyped =
+        skipFinalSub: static bool): untyped =
   ## Generate an optimized Montgomery Multiplication kernel
   ## using the CIOS method
   ##
@@ -184,26 +184,19 @@ macro mulMont_CIOS_sparebit_gen[N: static int](
     )
   result.add ctx.generate()
 
-func mulMont_CIOS_sparebit_asm*(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType, skipFinalSub: static bool = false) =
+func mulMont_CIOS_sparebit_asm*(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType, skipFinalSub: static bool = false) {.noInline.} =
   ## Constant-time Montgomery multiplication
   ## If "skipFinalSub" is set
   ## the result is in the range [0, 2M)
   ## otherwise the result is in the range [0, M)
-  ## 
+  ##
   ## This procedure can only be called if the modulus doesn't use the full bitwidth of its underlying representation
+  # This MUST be noInline or Clang will run out of registers with LTO
   r.mulMont_CIOS_sparebit_gen(a, b, M, m0ninv, skipFinalSub)
 
 # Montgomery Squaring
 # ------------------------------------------------------------
 
-func square_asm_inline[rLen, aLen: static int](r: var Limbs[rLen], a: Limbs[aLen]) {.inline.} =
-  ## Multi-precision Squaring
-  ## Assumes r doesn't alias a
-  ## Extra indirection as the generator assumes that
-  ## arrays are pointers, which is true for parameters
-  ## but not for stack variables
-  sqr_gen(r, a)
-
 func squareMont_CIOS_asm*[N](
        r: var Limbs[N],
        a, M: Limbs[N],
@@ -211,8 +204,8 @@ func squareMont_CIOS_asm*[N](
        spareBits: static int, skipFinalSub: static bool) =
   ## Constant-time modular squaring
   var r2x {.noInit.}: Limbs[2*N]
-  r2x.square_asm_inline(a)
-  r.redcMont_asm_inline(r2x, M, m0ninv, spareBits, skipFinalSub)
+  square_asm(r2x, a)
+  r.redcMont_asm(r2x, M, m0ninv, spareBits, skipFinalSub)
 
 # Montgomery Sum of Products
 # ------------------------------------------------------------
@@ -220,11 +213,10 @@ func squareMont_CIOS_asm*[N](
 macro sumprodMont_CIOS_spare2bits_gen[N, K: static int](
         r_PIR: var Limbs[N], a_PIR, b_PIR: array[K, Limbs[N]],
         M_PIR: Limbs[N], m0ninv_REG: BaseType,
-        skipFinalSub: static bool
-      ): untyped =
+        skipFinalSub: static bool): untyped =
   ## Generate an optimized Montgomery merged sum of products ⅀aᵢ.bᵢ kernel
   ## using the CIOS method
-  ## 
+  ##
   ## This requires 2 spare bits in the most significant word
   ## so that we can skip the intermediate reductions
 
@@ -276,7 +268,7 @@ macro sumprodMont_CIOS_spare2bits_gen[N, K: static int](
     tN = scratch[2]                                  # High part of extended precision multiplication
     C = scratch[3]                                   # Carry during reduction step
     r = scratch[4]                                   # Stores the `r` operand
-    S = scratch[5]                                   # Mul step: Stores the carry A 
+    S = scratch[5]                                   # Mul step: Stores the carry A
                                                      # Red step: Stores (t[0] * m0ninv) mod 2ʷ
 
   # Registers used:
@@ -338,7 +330,7 @@ macro sumprodMont_CIOS_spare2bits_gen[N, K: static int](
         ctx.add t[0], rax
         ctx.adc rdx, 0
       ctx.mov A, rdx
-      
+
       for j in 1 ..< N:
         ctx.comment "        (A,t[j])  := t[j] + a[k][j]*b[k][i] + A"
         ctx.mov rax, a[k, j]
@@ -351,7 +343,7 @@ macro sumprodMont_CIOS_spare2bits_gen[N, K: static int](
         ctx.`xor` A, A
         ctx.add t[j], rax
         ctx.adc A, rdx
-      
+
       ctx.comment "    tN += A"
       ctx.add tN, A
 
@@ -407,6 +399,6 @@ func sumprodMont_CIOS_spare2bits_asm*[N, K: static int](
   ## If "skipFinalSub" is set
   ## the result is in the range [0, 2M)
   ## otherwise the result is in the range [0, M)
-  ## 
+  ##
   ## This procedure can only be called if the modulus doesn't use the full bitwidth of its underlying representation
   r.sumprodMont_CIOS_spare2bits_gen(a, b, M, m0ninv, skipFinalSub)
\ No newline at end of file
diff --git a/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim b/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim
index d602bf6dd..04c74fdd0 100644
--- a/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim
+++ b/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim
@@ -30,8 +30,9 @@ static: doAssert UseASM_X86_64
 
 # MULX/ADCX/ADOX
 {.localPassC:"-madx -mbmi2".}
-# Necessary for the compiler to find enough registers (enabled at -O1)
-{.localPassC:"-fomit-frame-pointer".}
+# Necessary for the compiler to find enough registers
+{.localPassC:"-fomit-frame-pointer".}  # (enabled at -O1)
+{.localPassC:"-fno-sanitize=address".} # need 15 registers out of 16 (1 reserved for stack pointer, none available for Address Sanitizer)
 
 # Montgomery Multiplication
 # ------------------------------------------------------------
@@ -42,8 +43,7 @@ proc mulx_by_word(
        t: OperandArray,
        a: Operand, # Pointer in scratchspace
        word0: Operand,
-       lo: Operand
-     ) =
+       lo: Operand) =
   ## Multiply the `a[0..<N]` by `word` and store in `t[0..<N]`
   ## and carry register `C` (t[N])
   ## `t` and `C` overwritten
@@ -89,8 +89,7 @@ proc mulaccx_by_word(
        a: Operand, # Pointer in scratchspace
        i: int,
        word: Operand,
-       lo: Operand
-     ) =
+       lo: Operand) =
   ## Multiply the `a[0..<N]` by `word`
   ## and accumulate in `t[0..<N]`
   ## and carry register `C` (t[N])
@@ -131,8 +130,7 @@ proc partialRedx(
        M: OperandArray,
        m0ninv: Operand,
        lo: Operand or Register,
-       S: Operand
-     ) =
+       S: Operand) =
     ## Partial Montgomery reduction
     ## For CIOS method
     ## `C` the update carry flag (represents t[N])
@@ -284,7 +282,7 @@ func mulMont_CIOS_sparebit_asm_adx_inline*(r: var Limbs, a, b, M: Limbs, m0ninv:
   ## If "skipFinalSub" is set
   ## the result is in the range [0, 2M)
   ## otherwise the result is in the range [0, M)
-  ## 
+  ##
   ## This procedure can only be called if the modulus doesn't use the full bitwidth of its underlying representation
   r.mulMont_CIOS_sparebit_adx_gen(a, b, M, m0ninv, skipFinalSub)
 
@@ -293,7 +291,7 @@ func mulMont_CIOS_sparebit_asm_adx*(r: var Limbs, a, b, M: Limbs, m0ninv: BaseTy
   ## If "skipFinalSub" is set
   ## the result is in the range [0, 2M)
   ## otherwise the result is in the range [0, M)
-  ## 
+  ##
   ## This procedure can only be called if the modulus doesn't use the full bitwidth of its underlying representation
   r.mulMont_CIOS_sparebit_asm_adx_inline(a, b, M, m0ninv, skipFinalSub)
 
@@ -307,7 +305,7 @@ func squareMont_CIOS_asm_adx*[N](
        spareBits: static int, skipFinalSub: static bool) =
   ## Constant-time modular squaring
   var r2x {.noInit.}: Limbs[2*N]
-  r2x.square_asm_adx_inline(a)
+  r2x.square_asm_adx(a)
   r.redcMont_asm_adx(r2x, M, m0ninv, spareBits, skipFinalSub)
 
 # Montgomery Sum of Products
@@ -316,11 +314,10 @@ func squareMont_CIOS_asm_adx*[N](
 macro sumprodMont_CIOS_spare2bits_adx_gen[N, K: static int](
         r_PIR: var Limbs[N], a_PIR, b_PIR: array[K, Limbs[N]],
         M_PIR: Limbs[N], m0ninv_REG: BaseType,
-        skipFinalSub: static bool
-      ): untyped =
+        skipFinalSub: static bool): untyped =
   ## Generate an optimized Montgomery merged sum of products ⅀aᵢ.bᵢ kernel
   ## using the CIOS method
-  ## 
+  ##
   ## This requires 2 spare bits in the most significant word
   ## so that we can skip the intermediate reductions
 
@@ -372,7 +369,7 @@ macro sumprodMont_CIOS_spare2bits_adx_gen[N, K: static int](
     tN = scratch[2]                                  # High part of extended precision multiplication
     C = scratch[3]                                   # Carry during reduction step
     r = scratch[4]                                   # Stores the `r` operand
-    S = scratch[5]                                   # Mul step: Stores the carry A 
+    S = scratch[5]                                   # Mul step: Stores the carry A
                                                      # Red step: Stores (t[0] * m0ninv) mod 2ʷ
 
   # Registers used:
@@ -433,7 +430,7 @@ macro sumprodMont_CIOS_spare2bits_adx_gen[N, K: static int](
         ctx.mulx A, rax, a[k, 0], rdx
         ctx.adcx t[0], rax
         ctx.adox t[1], A
-      
+
       for j in 1 ..< N-1:
         ctx.comment "        (A,t[j])  := t[j] + a[k][j]*b[k][i] + A"
         if i == 0 and k == 0:
@@ -449,7 +446,7 @@ macro sumprodMont_CIOS_spare2bits_adx_gen[N, K: static int](
 
       # Last limb
       ctx.mulx A, rax, a[k, N-1], rdx
-      if i == 0 and k == 0:  
+      if i == 0 and k == 0:
         ctx.adc t[N-1], rax
         ctx.comment "    tN += A"
         ctx.adc tN, A
@@ -490,6 +487,6 @@ func sumprodMont_CIOS_spare2bits_asm_adx*[N, K: static int](
   ## If "skipFinalSub" is set
   ## the result is in the range [0, 2M)
   ## otherwise the result is in the range [0, M)
-  ## 
+  ##
   ## This procedure can only be called if the modulus doesn't use the full bitwidth of its underlying representation
   r.sumprodMont_CIOS_spare2bits_adx_gen(a, b, M, m0ninv, skipFinalSub)
\ No newline at end of file
diff --git a/constantine/math/arithmetic/assembly/limbs_asm_mul_x86_adx_bmi2.nim b/constantine/math/arithmetic/assembly/limbs_asm_mul_x86_adx_bmi2.nim
index 7ad80fb27..a22deaff2 100644
--- a/constantine/math/arithmetic/assembly/limbs_asm_mul_x86_adx_bmi2.nim
+++ b/constantine/math/arithmetic/assembly/limbs_asm_mul_x86_adx_bmi2.nim
@@ -27,8 +27,8 @@ static: doAssert UseASM_X86_64
 
 # MULX/ADCX/ADOX
 {.localPassC:"-madx -mbmi2".}
-# Necessary for the compiler to find enough registers (enabled at -O1)
-# {.localPassC:"-fomit-frame-pointer".}
+# Necessary for the compiler to find enough registers
+# {.localPassC:"-fomit-frame-pointer".}  # (enabled at -O1)
 
 # Multiplication
 # ------------------------------------------------------------
@@ -36,8 +36,7 @@ proc mulx_by_word(
        ctx: var Assembler_x86,
        r0: Operand,
        a, t: OperandArray,
-       word0: Operand
-     ) =
+       word0: Operand) =
   ## Multiply the `a[0..<N]` by `word`
   ## and store in `[t[n..1]:r0]`
   ## with [t[n..1]:r0] = tn, tn-1, ... t1, r0
@@ -74,8 +73,7 @@ proc mulaccx_by_word(
        r: OperandArray,
        i: int,
        a, t: OperandArray,
-       word: Operand
-     ) =
+       word: Operand) =
   ## Multiply the `a[0..<N]` by `word`
   ## and store in `[t[n..0]:r0]`
   ## with [t[n..0]:r0] = tn, tn-1, ... t1, r0
@@ -603,12 +601,7 @@ macro sqrx_gen*[rLen, aLen: static int](r_PIR: var Limbs[rLen], a_PIR: Limbs[aLe
   # Codegen
   result.add ctx.generate
 
-func square_asm_adx_inline*[rLen, aLen: static int](r: var Limbs[rLen], a: Limbs[aLen]) {.inline.} =
-  ## Multi-precision Squaring
-  ## inline version
-  sqrx_gen(r, a)
-
 func square_asm_adx*[rLen, aLen: static int](r: var Limbs[rLen], a: Limbs[aLen]) =
   ## Multi-precision Squaring
   ## Assumes r doesn't alias a
-  square_asm_adx_inline(r, a)
+  sqrx_gen(r, a)
diff --git a/constantine/math/arithmetic/assembly/limbs_asm_redc_mont_x86.nim b/constantine/math/arithmetic/assembly/limbs_asm_redc_mont_x86.nim
index ea1412d04..10f20c5d7 100644
--- a/constantine/math/arithmetic/assembly/limbs_asm_redc_mont_x86.nim
+++ b/constantine/math/arithmetic/assembly/limbs_asm_redc_mont_x86.nim
@@ -22,8 +22,8 @@ import
 
 static: doAssert UseASM_X86_32
 
-# Necessary for the compiler to find enough registers (enabled at -O1)
-{.localPassC:"-fomit-frame-pointer".}
+# Necessary for the compiler to find enough registers
+{.localPassC:"-fomit-frame-pointer".}  # (enabled at -O1)
 
 # Montgomery reduction
 # ------------------------------------------------------------
@@ -33,9 +33,7 @@ macro redc2xMont_gen*[N: static int](
        a_PIR: array[N*2, SecretWord],
        M_PIR: array[N, SecretWord],
        m0ninv_REG: BaseType,
-       spareBits: static int, skipFinalSub: static bool
-      ) =
-
+       spareBits: static int, skipFinalSub: static bool) =
   # No register spilling handling
   doAssert N > 2, "The Assembly-optimized montgomery reduction requires a minimum of 2 limbs."
   doAssert N <= 6, "The Assembly-optimized montgomery reduction requires at most 6 limbs."
@@ -152,7 +150,7 @@ macro redc2xMont_gen*[N: static int](
 
   # v is invalidated from now on
   let t = repackRegisters(v, u[N], u[N+1])
-  
+
   if spareBits >= 2 and skipFinalSub:
     for i in 0 ..< N:
       ctx.mov r_temp[i], u[i]
@@ -164,29 +162,17 @@ macro redc2xMont_gen*[N: static int](
   # Code generation
   result.add ctx.generate()
 
-func redcMont_asm_inline*[N: static int](
-       r: var array[N, SecretWord],
-       a: array[N*2, SecretWord],
-       M: array[N, SecretWord],
-       m0ninv: BaseType,
-       spareBits: static int,
-       skipFinalSub: static bool = false
-      ) {.inline.} =
-  ## Constant-time Montgomery reduction
-  ## Inline-version
-  redc2xMont_gen(r, a, M, m0ninv, spareBits, skipFinalSub)
-
 func redcMont_asm*[N: static int](
        r: var array[N, SecretWord],
        a: array[N*2, SecretWord],
        M: array[N, SecretWord],
        m0ninv: BaseType,
        spareBits: static int,
-       skipFinalSub: static bool
-      ) =
+       skipFinalSub: static bool) {.noInline.}  =
   ## Constant-time Montgomery reduction
+  # This MUST be noInline or Clang will run out of registers with LTO
   static: doAssert UseASM_X86_64, "This requires x86-64."
-  redcMont_asm_inline(r, a, M, m0ninv, spareBits, skipFinalSub)
+  redc2xMont_gen(r, a, M, m0ninv, spareBits, skipFinalSub)
 
 # Montgomery conversion
 # ----------------------------------------------------------
@@ -230,7 +216,7 @@ macro mulMont_by_1_gen[N: static int](
     m = scratch[1] # Stores (t[0] * m0ninv) mod 2ʷ
 
   let scratchSym = scratch.nimSymbol
-  
+
   # Copy a in t
   result.add quote do:
     var `scratchSym` {.noInit, used.}: Limbs[`scratchSlots`]
diff --git a/constantine/math/arithmetic/assembly/limbs_asm_redc_mont_x86_adx_bmi2.nim b/constantine/math/arithmetic/assembly/limbs_asm_redc_mont_x86_adx_bmi2.nim
index 10b2e3b60..11d67d800 100644
--- a/constantine/math/arithmetic/assembly/limbs_asm_redc_mont_x86_adx_bmi2.nim
+++ b/constantine/math/arithmetic/assembly/limbs_asm_redc_mont_x86_adx_bmi2.nim
@@ -23,8 +23,8 @@ static: doAssert UseASM_X86_64
 
 # MULX/ADCX/ADOX
 {.localPassC:"-madx -mbmi2".}
-# Necessary for the compiler to find enough registers (enabled at -O1)
-{.localPassC:"-fomit-frame-pointer".}
+# Necessary for the compiler to find enough registers
+{.localPassC:"-fomit-frame-pointer".} # (enabled at -O1)
 
 # No exceptions allowed
 {.push raises: [].}
@@ -37,8 +37,7 @@ macro redc2xMont_adx_gen[N: static int](
        a_PIR: array[N*2, SecretWord],
        M_PIR: array[N, SecretWord],
        m0ninv_REG: BaseType,
-       spareBits: static int, skipFinalSub: static bool
-      ) =
+       spareBits: static int, skipFinalSub: static bool) =
 
   # No register spilling handling
   doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
@@ -141,28 +140,18 @@ macro redc2xMont_adx_gen[N: static int](
   # Code generation
   result.add ctx.generate()
 
-func redcMont_asm_adx_inline*[N: static int](
-       r: var array[N, SecretWord],
-       a: array[N*2, SecretWord],
-       M: array[N, SecretWord],
-       m0ninv: BaseType,
-       spareBits: static int,
-       skipFinalSub: static bool = false
-      ) {.inline.} =
-  ## Constant-time Montgomery reduction
-  ## Inline-version
-  redc2xMont_adx_gen(r, a, M, m0ninv, spareBits, skipFinalSub)
-
 func redcMont_asm_adx*[N: static int](
        r: var array[N, SecretWord],
        a: array[N*2, SecretWord],
        M: array[N, SecretWord],
        m0ninv: BaseType,
        spareBits: static int,
-       skipFinalSub: static bool = false
-      ) =
+       skipFinalSub: static bool = false) {.noInline.} =
   ## Constant-time Montgomery reduction
-  redcMont_asm_adx_inline(r, a, M, m0ninv, spareBits, skipFinalSub)
+  # Inlining redcMont_asm_adx twice in mul_fp2_complex_asm_adx
+  # causes GCC to miscompile with -Os (--opt:size)
+  # see https://github.com/mratsim/constantine/issues/229
+  redc2xMont_adx_gen(r, a, M, m0ninv, spareBits, skipFinalSub)
 
 # Montgomery conversion
 # ----------------------------------------------------------
@@ -205,7 +194,7 @@ macro mulMont_by_1_adx_gen[N: static int](
     C = scratch[0] # Stores the high-part of muliplication
 
   let scratchSym = scratch.nimSymbol
-  
+
   # Copy a in t
   result.add quote do:
     var `scratchSym` {.noInit, used.}: Limbs[`scratchSlots`]
diff --git a/constantine/math/arithmetic/finite_fields.nim b/constantine/math/arithmetic/finite_fields.nim
index 3719f62f5..fbeba1fee 100644
--- a/constantine/math/arithmetic/finite_fields.nim
+++ b/constantine/math/arithmetic/finite_fields.nim
@@ -249,8 +249,7 @@ func sumprod*[N: static int](r: var FF, a, b: array[N, FF], skipFinalSub: static
   r.mres.sumprodMont(
     cast[ptr array[N, typeof(a[0].mres)]](a.unsafeAddr)[],
     cast[ptr array[N, typeof(b[0].mres)]](b.unsafeAddr)[],
-    FF.fieldMod(), FF.getNegInvModWord(), FF.getSpareBits(), skipFinalSub
-  )
+    FF.fieldMod(), FF.getNegInvModWord(), FF.getSpareBits(), skipFinalSub)
 
 # ############################################################
 #
diff --git a/constantine/math/arithmetic/limbs_exgcd.nim b/constantine/math/arithmetic/limbs_exgcd.nim
index 202b81126..a6445b702 100644
--- a/constantine/math/arithmetic/limbs_exgcd.nim
+++ b/constantine/math/arithmetic/limbs_exgcd.nim
@@ -117,7 +117,6 @@ debug:
       r = SecretWord r
 
     var a, b: array[2, SecretWord]
-    var e: array[2, SecretWord]
     smul(a[1], a[0], u, r)
     smul(b[1], b[0], v, q)
 
@@ -373,8 +372,8 @@ template matVecMul_shr_k_impl(
 
   # First iteration of [u v] [f]
   #                    [q r].[g]
-  cf.ssumprodAccNoCarry(u, f[0], v, g[0])
-  cg.ssumprodAccNoCarry(q, f[0], r, g[0])
+  ssumprodAccNoCarry(cf, u, f[0], v, g[0])
+  ssumprodAccNoCarry(cg, q, f[0], r, g[0])
   # bottom k bits are zero by construction
   debug:
     doAssert BaseType(cf.lo and Max) == 0, "bottom k bits should be 0, cf.lo: " & $BaseType(cf.lo)
@@ -384,8 +383,8 @@ template matVecMul_shr_k_impl(
   cg.ashr(k)
 
   for i in 1 ..< numLimbsLeft:
-    cf.ssumprodAccNoCarry(u, f[i], v, g[i])
-    cg.ssumprodAccNoCarry(q, f[i], r, g[i])
+    ssumprodAccNoCarry(cf, u, f[i], v, g[i])
+    ssumprodAccNoCarry(cg, q, f[i], r, g[i])
     f[i-1] = cf.lo and Max
     g[i-1] = cg.lo and Max
     cf.ashr(k)
diff --git a/constantine/math/arithmetic/limbs_montgomery.nim b/constantine/math/arithmetic/limbs_montgomery.nim
index b3596a8fd..b26b5e9b7 100644
--- a/constantine/math/arithmetic/limbs_montgomery.nim
+++ b/constantine/math/arithmetic/limbs_montgomery.nim
@@ -56,11 +56,11 @@ func redc2xMont_CIOS[N: static int](
        M: array[N, SecretWord],
        m0ninv: BaseType, skipFinalSub: static bool = false) =
   ## Montgomery reduce a double-precision bigint modulo M
-  ## 
+  ##
   ## This maps
   ## - [0, 4p²) -> [0, 2p) with skipFinalSub
   ## - [0, 4p²) -> [0, p) without
-  ## 
+  ##
   ## skipFinalSub skips the final substraction step.
   # - Analyzing and Comparing Montgomery Multiplication Algorithms
   #   Cetin Kaya Koc and Tolga Acar and Burton S. Kaliski Jr.
@@ -125,11 +125,11 @@ func redc2xMont_Comba[N: static int](
        M: array[N, SecretWord],
        m0ninv: BaseType, skipFinalSub: static bool = false) {.used.} =
   ## Montgomery reduce a double-precision bigint modulo M
-  ## 
+  ##
   ## This maps
   ## - [0, 4p²) -> [0, 2p) with skipFinalSub
   ## - [0, 4p²) -> [0, p) without
-  ## 
+  ##
   ## skipFinalSub skips the final substraction step.
   # We use Product Scanning / Comba multiplication
   var t, u, v = Zero
@@ -179,11 +179,11 @@ func mulMont_CIOS_sparebit(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType, skipF
   ## This requires the most significant word of the Modulus
   ##   M[^1] < high(SecretWord) shr 1 (i.e. less than 0b01111...1111)
   ## https://hackmd.io/@gnark/modular_multiplication
-  ## 
+  ##
   ## This maps
   ## - [0, 2p) -> [0, 2p) with skipFinalSub
   ## - [0, 2p) -> [0, p) without
-  ## 
+  ##
   ## skipFinalSub skips the final substraction step.
 
   # We want all the computation to be kept in registers
@@ -262,11 +262,11 @@ func mulMont_CIOS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType) {.used.} =
 
 func mulMont_FIPS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType, skipFinalSub: static bool = false) =
   ## Montgomery Multiplication using Finely Integrated Product Scanning (FIPS)
-  ## 
+  ##
   ## This maps
   ## - [0, 2p) -> [0, 2p) with skipFinalSub
   ## - [0, 2p) -> [0, p) without
-  ## 
+  ##
   ## skipFinalSub skips the final substraction step.
   # - Architectural Enhancements for Montgomery
   #   Multiplication on Embedded RISC Processors
@@ -310,11 +310,11 @@ func sumprodMont_CIOS_spare2bits[K: static int](
        skipFinalSub: static bool = false) =
   ## Compute r = ⅀aᵢ.bᵢ (mod M) (suim of products)
   ## This requires 2 unused bits in the field element representation
-  ## 
+  ##
   ## This maps
   ## - [0, 2p) -> [0, 2p) with skipFinalSub
   ## - [0, 2p) -> [0, p) without
-  ## 
+  ##
   ## skipFinalSub skips the final substraction step.
 
   # We want all the computation to be kept in registers
@@ -398,7 +398,7 @@ func sumprodMont_CIOS_spare2bits[K: static int](
 
 # Montgomery Conversion
 # ------------------------------------------------------------
-# 
+#
 # In Montgomery form, inputs are scaled by a constant R
 # so a' = aR (mod p) and b' = bR (mod p)
 #
@@ -453,7 +453,7 @@ func redc2xMont*[N: static int](
        m0ninv: BaseType,
        spareBits: static int, skipFinalSub: static bool = false) {.inline.} =
   ## Montgomery reduce a double-precision bigint modulo M
-  
+
   const skipFinalSub = skipFinalSub and spareBits >= 2
 
   when UseASM_X86_64 and r.len <= 6:
@@ -543,14 +543,17 @@ func sumprodMont*[N: static int](
         r: var Limbs, a, b: array[N, Limbs],
         M: Limbs, m0ninv: BaseType,
         spareBits: static int,
-        skipFinalSub: static bool = false) {.inline.} =
+        skipFinalSub: static bool = false) {.noInline.} =
+  ## Compute r <- ⅀aᵢ.bᵢ (mod M) (sum of products)
+  # This function must be noInline or GCC miscompiles
+  # with LTO, see https://github.com/mratsim/constantine/issues/230
   when spareBits >= 2:
     when UseASM_X86_64 and r.len in {2 .. 6}:
       if ({.noSideEffect.}: hasAdx()):
         r.sumprodMont_CIOS_spare2bits_asm_adx(a, b, M, m0ninv, skipFinalSub)
       else:
         r.sumprodMont_CIOS_spare2bits_asm(a, b, M, m0ninv, skipFinalSub)
-    else:  
+    else:
       r.sumprodMont_CIOS_spare2bits(a, b, M, m0ninv, skipFinalSub)
   else:
     r.mulMont(a[0], b[0], M, m0ninv, spareBits, skipFinalSub = false)
@@ -719,7 +722,7 @@ func powMontSquarings(
 
   # We have k bits and can do k squaring
   for i in 0 ..< k:
-    a.squareMont(a, M, m0ninv, spareBits)  
+    a.squareMont(a, M, m0ninv, spareBits)
 
   return (k, bits)
 
diff --git a/constantine/math/config/curves_prop_field_core.nim b/constantine/math/config/curves_prop_field_core.nim
index d4ad63548..668c2784c 100644
--- a/constantine/math/config/curves_prop_field_core.nim
+++ b/constantine/math/config/curves_prop_field_core.nim
@@ -45,10 +45,6 @@ func has_P_3mod4_primeModulus*(C: static Curve): static bool =
   ## Returns true iff p ≡ 3 (mod 4)
   (BaseType(C.Mod.limbs[0]) and 3) == 3
 
-func has_P_3mod8_primeModulus*(C: static Curve): static bool =
-  ## Returns true iff p ≡ 3 (mod 8)
-  (BaseType(C.Mod.limbs[0]) and 7) == 3
-
 func has_P_5mod8_primeModulus*(C: static Curve): static bool =
   ## Returns true iff p ≡ 5 (mod 8)
   (BaseType(C.Mod.limbs[0]) and 7) == 5
diff --git a/constantine/math/extension_fields/assembly/fp2_asm_x86_adx_bmi2.nim b/constantine/math/extension_fields/assembly/fp2_asm_x86_adx_bmi2.nim
index 04f5811eb..938812e5e 100644
--- a/constantine/math/extension_fields/assembly/fp2_asm_x86_adx_bmi2.nim
+++ b/constantine/math/extension_fields/assembly/fp2_asm_x86_adx_bmi2.nim
@@ -28,8 +28,8 @@ static: doAssert UseASM_X86_64
 
 # MULX/ADCX/ADOX
 {.localPassC:"-madx -mbmi2".}
-# Necessary for the compiler to find enough registers (enabled at -O1)
-{.localPassC:"-fomit-frame-pointer".}
+# Necessary for the compiler to find enough registers
+{.localPassC:"-fomit-frame-pointer".} # (enabled at -O1)
 
 # No exceptions allowed
 {.push raises: [].}
@@ -48,8 +48,7 @@ func has1extraBit(F: type Fp): bool =
 
 func sqrx2x_complex_asm_adx*(
         r: var array[2, FpDbl],
-        a: array[2, Fp]
-      ) =
+        a: array[2, Fp]) =
   ## Complex squaring on 𝔽p2
   # This specialized proc inlines all calls and avoids many ADX support checks.
   # and push/pop for paramater passing.
@@ -69,8 +68,7 @@ func sqrx2x_complex_asm_adx*(
 
 func sqrx_complex_sparebit_asm_adx*(
         r: var array[2, Fp],
-        a: array[2, Fp]
-      ) =
+        a: array[2, Fp]) =
   ## Complex squaring on 𝔽p2
   # This specialized proc inlines all calls and avoids many ADX support checks.
   # and push/pop for paramater passing.
@@ -91,8 +89,7 @@ func sqrx_complex_sparebit_asm_adx*(
 
 func mul2x_fp2_complex_asm_adx*(
         r: var array[2, FpDbl],
-        a, b: array[2, Fp]
-      ) =
+        a, b: array[2, Fp]) =
   ## Complex multiplication on 𝔽p2
   var D {.noInit.}: typeof(r.c0)
   var t0 {.noInit.}, t1 {.noInit.}: typeof(a.c0)
@@ -121,15 +118,15 @@ func mul_fp2_complex_asm_adx*(
   ## Complex multiplication on 𝔽p2
   var d {.noInit.}: array[2,doublePrec(Fp)]
   d.mul2x_fp2_complex_asm_adx(a, b)
-  r.c0.mres.limbs.redcMont_asm_adx_inline(
+  # Inlining redcMont_asm_adx causes GCC to miscompile with -Os (--opt:size)
+  # see https://github.com/mratsim/constantine/issues/229
+  r.c0.mres.limbs.redcMont_asm_adx(
     d.c0.limbs2x,
     Fp.fieldMod().limbs,
     Fp.getNegInvModWord(),
-    Fp.getSpareBits()
-  )
-  r.c1.mres.limbs.redcMont_asm_adx_inline(
+    Fp.getSpareBits())
+  r.c1.mres.limbs.redcMont_asm_adx(
     d.c1.limbs2x,
     Fp.fieldMod().limbs,
     Fp.getNegInvModWord(),
-    Fp.getSpareBits()
-  )
+    Fp.getSpareBits())
diff --git a/constantine/math/extension_fields/exponentiations.nim b/constantine/math/extension_fields/exponentiations.nim
index 40bf6199e..c8b32aff2 100644
--- a/constantine/math/extension_fields/exponentiations.nim
+++ b/constantine/math/extension_fields/exponentiations.nim
@@ -43,7 +43,7 @@ func getWindowLen(bufLen: int): uint =
 func powPrologue[F](a: var F, scratchspace: var openarray[F]): uint =
   ## Setup the scratchspace, then set a to 1.
   ## Returns the fixed-window size for exponentiation with window optimization
-  result = scratchspace.len.getWindowLen
+  result = scratchspace.len.getWindowLen()
   # Precompute window content, special case for window = 1
   # (i.e scratchspace has only space for 2 temporaries)
   # The content scratchspace[2+k] is set at [k]P
@@ -62,8 +62,7 @@ func powSquarings[F](
        tmp: var F,
        window: uint,
        acc, acc_len: var uint,
-       e: var int
-     ): tuple[k, bits: uint] {.inline.}=
+       e: var int): tuple[k, bits: uint] {.inline.}=
   ## Squaring step of exponentiation by squaring
   ## Get the next k bits in range [1, window)
   ## Square k times
@@ -105,8 +104,7 @@ func powSquarings[F](
 func powUnsafeExponent[F](
        a: var F,
        exponent: openArray[byte],
-       scratchspace: var openArray[F]
-     ) =
+       scratchspace: var openArray[F]) =
   ## Extension field exponentiation r = a^exponent (mod p^m)
   ##
   ## Warning ⚠️ :
diff --git a/constantine/math/extension_fields/towers.nim b/constantine/math/extension_fields/towers.nim
index 4015ecf29..71bcbc6c5 100644
--- a/constantine/math/extension_fields/towers.nim
+++ b/constantine/math/extension_fields/towers.nim
@@ -979,12 +979,17 @@ func square2x_disjoint*[Fdbl, F](
 # Multiplications (specializations)
 # -------------------------------------------------------------------
 
-func prodImpl_fp4o2_p3mod8[C: static Curve](r: var Fp4[C], a, b: Fp4[C]) =
+func prodImpl_fp4o2_complex_snr_1pi[C: static Curve](r: var Fp4[C], a, b: Fp4[C]) =
   ## Returns r = a * b
-  ## For 𝔽p4/𝔽p2 with p ≡ 3 (mod 8),
-  ##   hence 𝔽p QNR is 𝑖 = √-1 as p ≡ 3 (mod 8) implies p ≡ 3 (mod 4)
-  ##   and 𝔽p SNR is (1 + i)
-  static: doAssert C.has_P_3mod8_primeModulus()
+  ## For 𝔽p4/𝔽p2 with the following non-residue (NR) constraints:
+  ##   * -1 is a quadratic non-residue in 𝔽p hence 𝔽p2 has coordinates a+𝑖b with i = √-1. This implies p ≡ 3 (mod 4)
+  ##   * (1 + i) is a quadratic non-residue in 𝔽p hence 𝔽p2 has coordinates a+vb with v = √(1+𝑖).
+  ##
+  ## According to Benger-Scott 2009(https://eprint.iacr.org/2009/556.pdf)
+  ## About 2/3 of the p ≡ 3 (mod 8) primes are in this case
+  static:
+    doAssert C.getNonResidueFp() == -1
+    doAssert C.getNonresidueFp2() == (1, 1)
   var
     b10_m_b11{.noInit.}, b10_p_b11{.noInit.}: Fp[C]
     n_a01{.noInit.}, n_a11{.noInit.}: Fp[C]
@@ -1374,8 +1379,8 @@ func prod*(r: var QuadraticExt, a, b: QuadraticExt) =
     when QuadraticExt is Fp12 or r.typeof.F.C.has_large_field_elem():
       # BW6-761 requires too many registers for Dbl width path
       r.prod_generic(a, b)
-    elif QuadraticExt is Fp4 and QuadraticExt.C.has_P_3mod8_primeModulus():
-      r.prodImpl_fp4o2_p3mod8(a, b)
+    elif QuadraticExt is Fp4 and QuadraticExt.C.getNonResidueFp() == -1 and QuadraticExt.C.getNonResidueFp2() == (1, 1):
+      r.prodImpl_fp4o2_complex_snr_1pi(a, b)
     else:
       var d {.noInit.}: doublePrec(typeof(r))
       d.prod2x_disjoint(a.c0, a.c1, b.c0, b.c1)
@@ -1628,13 +1633,18 @@ func square_Chung_Hasan_SQR3(r: var CubicExt, a: CubicExt) =
 # Multiplications (specializations)
 # -------------------------------------------------------------------
 
-func prodImpl_fp6o2_p3mod8[C: static Curve](r: var Fp6[C], a, b: Fp6[C]) =
+func prodImpl_fp6o2_complex_snr_1pi[C: static Curve](r: var Fp6[C], a, b: Fp6[C]) =
   ## Returns r = a * b
-  ## For 𝔽p6/𝔽p2 with p ≡ 3 (mod 8),
-  ##   hence 𝔽p QNR is 𝑖 = √-1 as p ≡ 3 (mod 8) implies p ≡ 3 (mod 4)
-  ##   and 𝔽p SNR is (1 + i)
+  ## For 𝔽p4/𝔽p2 with the following non-residue (NR) constraints:
+  ##   * -1 is a quadratic non-residue in 𝔽p hence 𝔽p2 has coordinates a+𝑖b with i = √-1. This implies p ≡ 3 (mod 4)
+  ##   * (1 + i) is a cubic non-residue in 𝔽p hence 𝔽p2 has coordinates a+vb with v = √(1+𝑖).
+  ##
+  ## According to Benger-Scott 2009 (https://eprint.iacr.org/2009/556.pdf)
+  ## About 2/3 of the p ≡ 3 (mod 8) primes are in this case
   # https://eprint.iacr.org/2022/367 - Equation 8
-  static: doAssert C.has_P_3mod8_primeModulus()
+  static:
+    doAssert C.getNonResidueFp() == -1
+    doAssert C.getNonresidueFp2() == (1, 1)
   var
     b10_p_b11{.noInit.}, b10_m_b11{.noInit.}: Fp[C]
     b20_p_b21{.noInit.}, b20_m_b21{.noInit.}: Fp[C]
@@ -2133,8 +2143,8 @@ func prod*(r: var CubicExt, a, b: CubicExt) =
   ## Out-of-place multiplication
   when CubicExt.C.has_large_field_elem():
     r.prodImpl(a, b)
-  elif r is Fp6 and CubicExt.C.has_P_3mod8_primeModulus():
-    r.prodImpl_fp6o2_p3mod8(a, b)
+  elif r is Fp6 and CubicExt.C.getNonResidueFp() == -1 and CubicExt.C.getNonResidueFp2() == (1, 1):
+    r.prodImpl_fp6o2_complex_snr_1pi(a, b)
   else:
     var d {.noInit.}: doublePrec(typeof(r))
     d.prod2x(a, b)
diff --git a/constantine/platforms/endians.nim b/constantine/platforms/endians.nim
index ffe3c67b3..751200085 100644
--- a/constantine/platforms/endians.nim
+++ b/constantine/platforms/endians.nim
@@ -34,9 +34,9 @@ template blobFrom*(dst: var openArray[byte], src: SomeUnsignedInt, startIdx: int
     for i in 0 ..< sizeof(src):
       dst[startIdx+sizeof(src)-1-i] = toByte(src shr (i * 8))
 
-func parseFromBlob*[T: byte|char](
+func parseFromBlob*(
            dst: var SomeUnsignedInt,
-           src: openArray[T],
+           src: openArray[byte],
            cursor: var uint, endian: static Endianness) {.inline.} =
   ## Read an unsigned integer from a raw binary blob.
   ## The `cursor` represents the current index in the array and is updated
@@ -63,8 +63,8 @@ func parseFromBlob*[T: byte|char](
   dst = accum
   cursor.inc(L)
 
-func dumpRawInt*[T: byte|char](
-           dst: var openArray[T],
+func dumpRawInt*(
+           dst: var openArray[byte],
            src: SomeUnsignedInt,
            cursor: uint, endian: static Endianness) {.inline.} =
   ## Dump an integer into raw binary form
diff --git a/constantine/platforms/gpu/bindings/utils.nim b/constantine/platforms/gpu/bindings/c_abi.nim
similarity index 100%
rename from constantine/platforms/gpu/bindings/utils.nim
rename to constantine/platforms/gpu/bindings/c_abi.nim
diff --git a/constantine/platforms/gpu/bindings/llvm_abi.nim b/constantine/platforms/gpu/bindings/llvm_abi.nim
index d176ea61b..2af35b4c0 100644
--- a/constantine/platforms/gpu/bindings/llvm_abi.nim
+++ b/constantine/platforms/gpu/bindings/llvm_abi.nim
@@ -6,7 +6,7 @@
 #   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
-import ./utils
+import ./c_abi
 
 {.passc: gorge("llvm-config --cflags").}
 {.passl: gorge("llvm-config --libs").}
@@ -67,7 +67,7 @@ proc getBufferSize(buf: MemoryBufferRef): csize_t {.used, importc: "LLVMGetBuffe
 
 proc dispose(msg: ErrorMessageString) {.used, importc: "LLVMDisposeErrorMessage".}
 proc getErrorMessage(err: ErrorRef): ErrorMessageString {.used, importc: "LLVMGetErrorMessage".}
- 
+
 # ############################################################
 #
 #                         Module
@@ -117,7 +117,7 @@ proc verify(module: ModuleRef, failureAction: VerifierFailureAction, msg: var LL
 # - initializeNativeTarget()
 # - initializeNativeAsmPrinter()
 # are implemented in the development header macros and aren't in the LLVM library
-# We want to only depend on the runtime for installation ease and size. 
+# We want to only depend on the runtime for installation ease and size.
 #
 # We can emulate the calls based on:
 # - /usr/include/llvm-c/Target.h
@@ -375,7 +375,7 @@ proc getTypeOf*(v: ValueRef): TypeRef {.importc: "LLVMTypeOf".}
 proc getValueName2(v: ValueRef, rLen: var csize_t): cstring {.used, importc: "LLVMGetValueName2".}
   ## Returns the name of a valeu if it exists.
   ## `rLen` stores the returned string length
-  ## 
+  ##
   ## This is not free, it requires internal hash table access
   ## The return value does not have to be freed and is a pointer an internal LLVM data structure
 
@@ -473,7 +473,7 @@ proc getInlineAsm*(
 
 # Intermediate Representation
 # ------------------------------------------------------------
-# 
+#
 # - NSW: no signed wrap, signed value cannot over- or underflow.
 # - NUW: no unsigned wrap, unsigned value cannot over- or underflow.
 
diff --git a/constantine/platforms/gpu/bindings/nvidia_abi.nim b/constantine/platforms/gpu/bindings/nvidia_abi.nim
index 3c3085bed..f3e8e0c2d 100644
--- a/constantine/platforms/gpu/bindings/nvidia_abi.nim
+++ b/constantine/platforms/gpu/bindings/nvidia_abi.nim
@@ -12,7 +12,7 @@
 #
 # ############################################################
 
-import ./utils
+import ./c_abi
 
 # ############################################################
 #
@@ -466,7 +466,7 @@ type
     CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED = 121,        ## Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays */
     CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V2 = 122,             ## 64-bit operations are supported in ::cuStreamBatchMemOp_v2 and related v2 MemOp APIs. */
     CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2 = 123,             ## ::CU_STREAM_WAIT_VALUE_NOR is supported by v2 MemOp APIs. */
-    CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED = 124,                            ## Device supports buffer sharing with dma_buf mechanism. */ 
+    CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED = 124,                            ## Device supports buffer sharing with dma_buf mechanism. */
     CU_DEVICE_ATTRIBUTE_MAX
 
   CUmemAttach_flags* = enum
diff --git a/constantine/platforms/gpu/nvidia.nim b/constantine/platforms/gpu/nvidia.nim
index a2b4dc7d0..674af7d93 100644
--- a/constantine/platforms/gpu/nvidia.nim
+++ b/constantine/platforms/gpu/nvidia.nim
@@ -8,7 +8,7 @@
 
 import
   ./bindings/nvidia_abi {.all.},
-  ./bindings/utils,
+  ./bindings/c_abi,
   ./llvm, ./ir,
   ./nvidia_inlineasm,
   ../primitives
@@ -41,12 +41,12 @@ export
 #
 # Unified memory is fully supported starting from Pascal GPU (GTX 1080, 2016, Compute Capability SM6.0)
 # and require Kepler at minimum.
-# 
+#
 # Cuda 9 exposes the current explicit synchronization primitives (cooperative groups) and deprecated the old ones
 # Those primitives are particularly suitable for Volta GPUs (GTX 2080, 2018, Compute Capability SM7.5)
 # and requiring.
 #
-# Furthermore Pascal GPUs predates the high demand for deep learning and cryptocurrency mining 
+# Furthermore Pascal GPUs predates the high demand for deep learning and cryptocurrency mining
 # and were widely available at an affordable price point.
 # Also given iven that it's a 7 years old architecture,
 # it is unlikely that users have an older Nvidia GPU available.
@@ -64,7 +64,7 @@ export
 template check*(status: CUresult) =
   ## Check the status code of a CUDA operation
   ## Exit program with error if failure
-  
+
   let code = status # ensure that the input expression is evaluated once only
   if code != CUDA_SUCCESS:
     writeStackTrace()
@@ -77,15 +77,15 @@ func cuModuleGetFunction*(kernel: var CUfunction, module: CUmodule, fnName: open
   cuModuleGetFunction(kernel, module, fnName[0].unsafeAddr)
 
 proc cudaDeviceInit*(deviceID = 0'i32): CUdevice =
-  
+
   check cuInit(deviceID.uint32)
-  
+
   var devCount: int32
   check cuDeviceGetCount(devCount)
   if devCount == 0:
     echo "cudaDeviceInit error: no devices supporting CUDA"
     quit 1
-  
+
   var cuDevice: CUdevice
   check cuDeviceGet(cuDevice, deviceID)
   var name = newString(128)
@@ -99,7 +99,7 @@ proc cudaDeviceInit*(deviceID = 0'i32): CUdevice =
   if major < 6:
     echo "Error: Device ",deviceID," is not sm_60 (Pascal generation, GTX 1080) or later"
     quit 1
-  
+
   return cuDevice
 
 # ############################################################
@@ -110,7 +110,7 @@ proc cudaDeviceInit*(deviceID = 0'i32): CUdevice =
 
 proc tagCudaKernel(module: ModuleRef, fn: FnDef) =
   ## Tag a function as a Cuda Kernel, i.e. callable from host
-  
+
   doAssert fn.fnTy.getReturnType().isVoid(), block:
     "Kernels must not return values but function returns " & $fn.fnTy.getReturnType().getTypeKind()
 
@@ -129,10 +129,10 @@ proc setCallableCudaKernel*(module: ModuleRef, fn: FnDef) =
   ##
   ## A function named `addmod` can be found by appending _public
   ##   check cuModuleGetFunction(fnPointer, cuModule, "addmod_public")
-  
+
   let pubName = fn.fnImpl.getName() & "_public"
   let pubFn = module.addFunction(cstring(pubName), fn.fnTy)
-  
+
   let ctx = module.getContext()
   let builder = ctx.createBuilder()
   defer: builder.dispose()
@@ -160,11 +160,11 @@ proc codegenNvidiaPTX*(asy: Assembler_LLVM, sm: tuple[major, minor: int32]): str
   ## SM corresponds to the target GPU architecture Compute Capability
   ## - https://developer.nvidia.com/cuda-gpus
   ## - https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities
-  ## 
+  ##
   ## This requires the following function to be called beforehand:
   ## - initializePasses()
   ## - initializeFullNVPTXTarget()
-  
+
   debug: doAssert asy.backend == bkNvidiaPTX
 
   asy.module.verify(AbortProcessAction)
@@ -242,9 +242,9 @@ proc exec*[T](jitFn: CUfunction, r: var T, a, b: T) =
 
     "Most CPUs (x86-64, ARM) are little-endian, as are Nvidia GPUs, which allows naive copying of parameters.\n" &
     "Your architecture '" & $hostCPU & "' is big-endian and GPU offloading is unsupported on it."
-  
+
   # We assume that all arguments are passed by reference in the Cuda kernel, hence the need for GPU alloc.
-  
+
   var rGPU, aGPU, bGPU: CUdeviceptr
   check cuMemAlloc(rGPU, csize_t sizeof(r))
   check cuMemAlloc(aGPU, csize_t sizeof(a))
diff --git a/constantine/platforms/primitives.nim b/constantine/platforms/primitives.nim
index 6837f7ef2..d5eb47c11 100644
--- a/constantine/platforms/primitives.nim
+++ b/constantine/platforms/primitives.nim
@@ -86,17 +86,16 @@ func setZero*[N](a: var array[N, SomeNumber]){.inline.} =
   for i in 0 ..< a.len:
     a[i] = 0
 
-func copy*[T: byte|char](
+func rawCopy*(
        dst: var openArray[byte],
        dStart: SomeInteger,
-       src: openArray[T],
+       src: openArray[byte],
        sStart: SomeInteger,
        len: SomeInteger
      ) {.inline.} =
   ## Copy dst[dStart ..< dStart+len] = src[sStart ..< sStart+len]
   ## Unlike the standard library, this cannot throw
   ## even a defect.
-  ## It also handles copy of char into byte arrays
   debug:
     doAssert 0 <= dStart and dStart+len <= dst.len.uint, "dStart: " & $dStart & ", dStart+len: " & $(dStart+len) & ", dst.len: " & $dst.len
     doAssert 0 <= sStart and sStart+len <= src.len.uint, "sStart: " & $sStart & ", sStart+len: " & $(sStart+len) & ", src.len: " & $src.len
diff --git a/constantine/platforms/views.nim b/constantine/platforms/views.nim
new file mode 100644
index 000000000..16ee38dfa
--- /dev/null
+++ b/constantine/platforms/views.nim
@@ -0,0 +1,181 @@
+# Constantine
+# Copyright (c) 2018-2019    Status Research & Development GmbH
+# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
+# Licensed and distributed under either of
+#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+# at your option. This file may not be copied, modified, or distributed except according to those terms.
+
+import std/macros
+
+# OpenArray type
+# ---------------------------------------------------------
+
+template toOpenArray*[T](p: ptr UncheckedArray[T], len: int): openArray[T] =
+  p.toOpenArray(0, len-1)
+
+# View type
+# ---------------------------------------------------------
+#
+# This view type is equivalent to (pointer + length)
+# like openArray. Unlike openArray it can be stored in a type
+# Or can be used for nested views like openArray[View[byte]]
+
+type View*[T] = object
+  # TODO, use `lent UncheckedArray[T]` for proper borrow-checking - https://github.com/nim-lang/Nim/issues/21674
+  data: ptr UncheckedArray[T]
+  len: int
+
+template toOpenArray*[T](v: View[T]): openArray[T] =
+  v.data.toOpenArray(0, v.len-1)
+
+# Binary blob API
+# ---------------------------------------------------------
+#
+# High-level API needs to provide functions of the form
+# - func verify[T: byte|char](pubkey: PubKey, message: T, signature: Signature)
+# - func update[T: byte|char](ctx: var Sha256Context, message: openarray[T])
+#
+# for all APIs that ingest bytes/strings including:
+# - Ciphers
+# - Signature protocols
+# - Hashing algorithms
+# - Message Authentication code
+# - Key derivation functions
+#
+# This causes the following issues:
+# - Code explosion due to monomorphization. The code for bytes and char will be duplicated needlessly.
+# - Cannot be exported to C. Generic code cannot be exported to C and so will need manual split
+# - Longer compile-times. The inner functions can be byte-only instead of using generics.
+#
+# Instead we create a `genCharAPI` macro that generates the same function as an openArray[byte]
+# but with openArray[char] inputs
+
+template toOpenArrayByte[T: byte|char](oa: openArray[T]): openArray[byte] =
+  when T is byte:
+    oa
+  else:
+    oa.toOpenArrayByte(oa.low, oa.high)
+
+macro genCharAPI*(procAst: untyped): untyped =
+  ## For each openArray[byte] parameter in the input proc
+  ## generate an openArray[char] variation.
+  procAst.expectKind({nnkProcDef, nnkFuncDef})
+
+  result = newStmtList()
+  result.add procAst
+
+  var genericParams = procAst[2].copyNimTree()
+  var wrapperParams = nnkFormalParams.newTree(procAst.params[0].copyNimTree())
+  var wrapperBody = newCall(ident($procAst.name))
+
+  proc matchBytes(node: NimNode): bool =
+    node.kind == nnkBracketExpr and
+      node[0].eqIdent"openArray" and
+      node[1].eqIdent"byte"
+
+  # We do 2 passes:
+  # If a single params is openArray[byte], we instantiate a non-generic proc.
+  # - This should make for faster compile-times.
+  # - It is also necessary for `hash` and `mac`, as it seems like overloading
+  #   a concept function with an argument that matches but the generic and a concrete param
+  #   crashes. i.e. either you use full generic (with genCharAPI) or you instantiate 2 concrete procs
+
+  let countBytesParams = block:
+    var count = 0
+    for i in 1 ..< procAst.params.len:
+      if procAst.params[i][^2].matchBytes():
+        count += 1
+      elif procAst.params[i][^2].kind == nnkVarTy and procAst.params[i][^2][0].matchBytes():
+        count += 1
+    count
+
+  if countBytesParams == 0:
+    error "Using genCharAPI on an input without any openArray[byte] parameter."
+
+  if countBytesParams == 1:
+    for i in 1 ..< procAst.params.len:
+      # Unfortunately, even in typed macro, .sameType(getType(openArray[byte])) doesn't match
+      if procAst.params[i][^2].matchBytes():
+        # Handle "a, b: openArray[byte]"
+        for j in 0 ..< procAst.params[i].len - 2:
+          wrapperParams.add newIdentDefs(
+            procAst.params[i][j].copyNimTree(),
+            nnkBracketExpr.newTree(ident"openArray", ident"char"))
+          wrapperBody.add newCall(bindSym"toOpenArrayByte", procAst.params[i][j])
+      elif procAst.params[i][^2].kind == nnkVarTy and procAst.params[i][^2][0].matchBytes():
+        # Handle "a, b: openArray[byte]"
+        for j in 0 ..< procAst.params[i].len - 2:
+          wrapperParams.add newIdentDefs(
+            procAst.params[i][j].copyNimTree(),
+            nnkVarTy.newTree(nnkBracketExpr.newTree(ident"openArray", ident"char")))
+          wrapperBody.add newCall(bindSym"toOpenArrayByte", procAst.params[i][j])
+      else:
+        wrapperParams.add procAst.params[i].copyNimTree()
+        # Handle "a, b: int"
+        for j in 0 ..< procAst.params[i].len - 2:
+          wrapperBody.add ident($procAst.params[i][j])
+
+  else:
+    if genericParams.kind == nnkEmpty:
+      genericParams = nnkGenericParams.newTree()
+
+    for i in 1 ..< procAst.params.len:
+      # Unfortunately, even in typed macro, .sameType(getType(openArray[byte])) doesn't match
+      if procAst.params[i][^2].matchBytes():
+        # Handle "a, b: openArray[byte]"
+        for j in 0 ..< procAst.params[i].len - 2:
+          let genericId = ident("API_" & $i & "_" & $j)
+          wrapperParams.add newIdentDefs(
+            procAst.params[i][j].copyNimTree(),
+            nnkBracketExpr.newTree(ident"openArray", genericId))
+          genericParams.add newIdentDefs(
+            genericId,
+            nnkInfix.newTree(ident("|"), ident("byte"), ident("char")))
+          wrapperBody.add newCall(bindSym"toOpenArrayByte", procAst.params[i][j])
+      elif procAst.params[i][^2].kind == nnkVarTy and procAst.params[i][^2][0].matchBytes():
+        for j in 0 ..< procAst.params[i].len - 2:
+          let genericId = ident("API_" & $i & "_" & $j)
+          wrapperParams.add newIdentDefs(
+            procAst.params[i][j].copyNimTree(),
+            nnkVarTy.newTree(nnkBracketExpr.newTree(bindSym"openArray", genericId)))
+          genericParams.add newIdentDefs(
+            genericId,
+            nnkInfix.newTree(ident("|"), ident("byte"), ident("char")))
+          wrapperBody.add newCall(bindSym"toOpenArrayByte", procAst.params[i][j])
+      else:
+        wrapperParams.add procAst.params[i].copyNimTree()
+        # Handle "a, b: int"
+        for j in 0 ..< procAst.params[i].len - 2:
+          wrapperBody.add ident($procAst.params[i][j])
+
+  var pragmas = nnkPragma.newTree(ident"inline")
+  let skipPragmas = ["inline", "noinline", "noInline", "exportc", "exportcpp", "extern", "cdecl", "stdcall", "dynlib", "libPrefix"]
+  for i in 0 ..< procAst.pragma.len:
+    if procAst.pragma[i].kind == nnkIdent:
+      if $procAst.pragma[i] notin skipPragmas:
+        pragmas.add procAst.pragma[i].copyNimTree()
+    else:
+      procAst.pragma[i].expectKind(nnkExprColonExpr)
+      if $procAst.pragma[i][0] notin skipPragmas:
+        pragmas.add procAst.pragma[i].copyNimTree()
+
+  let wrapper = newTree(
+    procAst.kind,             # proc or func
+    procAst[0].copyNimTree(), # name: Keep export marker if any
+    newEmptyNode(),           # term-rewriting macros
+    genericParams,
+    wrapperParams,
+    pragmas,
+    newEmptyNode(),
+    wrapperBody)
+  result.add wrapper
+
+when isMainModule:
+  expandMacros:
+
+    proc foo(x: int, a: openArray[byte]) {.genCharAPI.} =
+      discard
+
+    proc bar(x: int, a: openArray[byte], b: openArray[byte]) {.genCharAPI.} =
+      discard
\ No newline at end of file
diff --git a/constantine/signatures/bls_signatures.nim b/constantine/signatures/bls_signatures.nim
index 7d656923e..b8ef0f791 100644
--- a/constantine/signatures/bls_signatures.nim
+++ b/constantine/signatures/bls_signatures.nim
@@ -14,7 +14,8 @@ import
     ../math/constants/zoo_generators,
     ../math/config/curves,
     ../hash_to_curve/[hash_to_curve, h2c_hash_to_field],
-    ../hashes
+    ../hashes,
+    ../platforms/views
 
 # ############################################################
 #
@@ -23,7 +24,7 @@ import
 # ############################################################
 
 # This module implements generic BLS signatures
-# https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-bls-signature-04
+# https://www.ietf.org/archive/id/draft-irtf-cfrg-bls-signature-05.html
 # https://github.com/cfrg/draft-irtf-cfrg-bls-signature
 #
 # We use generic shortnames SecKey, PubKey, Sig
@@ -56,14 +57,14 @@ func derivePubkey*[Pubkey, SecKey](pubkey: var Pubkey, seckey: SecKey): bool =
   pubkey.affine(pk)
   return true
 
-func coreSign*[B1, B2, B3: byte|char, Sig, SecKey](
+func coreSign*[Sig, SecKey](
     signature: var Sig,
     secretKey: SecKey,
-    message: openarray[B1],
+    message: openArray[byte],
     H: type CryptoHash,
     k: static int,
-    augmentation: openarray[B2],
-    domainSepTag: openarray[B3]) =
+    augmentation: openArray[byte],
+    domainSepTag: openArray[byte]) {.genCharAPI.} =
   ## Computes a signature for the message from the specified secret key.
   ##
   ## Output:
@@ -81,7 +82,7 @@ func coreSign*[B1, B2, B3: byte|char, Sig, SecKey](
   ## - `augmentation`, an optional augmentation to the message. This will be prepended,
   ##   prior to hashing.
   ##   This is used for building the "message augmentation" variant of BLS signatures
-  ##   https://tools.ietf.org/html/draft-irtf-cfrg-bls-signature-04#section-3.2
+  ##   https://www.ietf.org/archive/id/draft-irtf-cfrg-bls-signature-05.html#section-3.2
   ##   which requires `CoreSign(SK, PK || message)`
   ##   and `CoreVerify(PK, PK || message, signature)`
   ## - `message` is the message to hash
@@ -95,14 +96,14 @@ func coreSign*[B1, B2, B3: byte|char, Sig, SecKey](
 
   signature.affine(sig)
 
-func coreVerify*[B1, B2, B3: byte|char, Pubkey, Sig](
+func coreVerify*[Pubkey, Sig](
     pubkey: Pubkey,
-    message: openarray[B1],
+    message: openarray[byte],
     signature: Sig,
     H: type CryptoHash,
     k: static int,
-    augmentation: openarray[B2],
-    domainSepTag: openarray[B3]): bool =
+    augmentation: openarray[byte],
+    domainSepTag: openarray[byte]): bool {.genCharAPI.} =
   ## Check that a signature is valid
   ## for a message under the provided public key
   ## This assumes that the PublicKey and Signatures
@@ -165,8 +166,7 @@ type
     domainSepTag{.align: 64.}: array[255, byte] # Alignment to enable SIMD
     dst_len: uint8
 
-func init*[T: char|byte](
-       ctx: var BLSAggregateSigAccumulator, domainSepTag: openArray[T]) =
+func init*(ctx: var BLSAggregateSigAccumulator, domainSepTag: openArray[byte]) {.genCharAPI.} =
   ## Initializes a BLS Aggregate Signature accumulator context.
 
   type H = BLSAggregateSigAccumulator.H
@@ -176,22 +176,18 @@ func init*[T: char|byte](
   if domainSepTag.len > 255:
     var t {.noInit.}: array[H.digestSize(), byte]
     H.shortDomainSepTag(output = t, domainSepTag)
-    copy(ctx.domainSepTag, dStart = 0,
-        t, sStart = 0,
-        H.digestSize())
+    rawCopy(ctx.domainSepTag, dStart = 0, t, sStart = 0, H.digestSize())
     ctx.dst_len = uint8 H.digestSize()
   else:
-    copy(ctx.domainSepTag, dStart = 0,
-        domainSepTag, sStart = 0,
-        domainSepTag.len)
+    rawCopy(ctx.domainSepTag, dStart = 0, domainSepTag, sStart = 0, domainSepTag.len)
     ctx.dst_len = uint8 domainSepTag.len
   for i in ctx.dst_len ..< ctx.domainSepTag.len:
     ctx.domainSepTag[i] = byte 0
 
-func update*[T: char|byte, Pubkey: ECP_ShortW_Aff](
+func update*[Pubkey: ECP_ShortW_Aff](
        ctx: var BLSAggregateSigAccumulator,
        pubkey: Pubkey,
-       message: openArray[T]): bool =
+       message: openArray[byte]): bool {.genCharAPI.} =
   ## Add a (public key, message) pair
   ## to a BLS aggregate signature accumulator
   ##
@@ -224,6 +220,12 @@ func update*[T: char|byte, Pubkey: ECP_ShortW_Aff](
 
     ctx.millerAccum.update(hmsgG1_aff, pubkey)
 
+func update*[Pubkey: ECP_ShortW_Aff](
+       ctx: var BLSAggregateSigAccumulator,
+       pubkey: Pubkey,
+       message: View[byte]): bool {.inline.} =
+  ctx.update(pubkey, message.toOpenArray())
+
 func merge*(ctxDst: var BLSAggregateSigAccumulator, ctxSrc: BLSAggregateSigAccumulator): bool =
   ## Merge 2 BLS signature accumulators: ctxDst <- ctxDst + ctxSrc
   ##
@@ -318,8 +320,8 @@ type
     # 20*1 (blinding 64-bit) + 50 (Miller) + 50 (final exp) = 120
     secureBlinding{.align: 32.}: array[32, byte]
 
-func hash[DigestSize: static int, T0, T1: char|byte](
-      H: type CryptoHash, digest: var array[DigestSize, byte], input0: openArray[T0], input1: openArray[T1]) =
+func hash[DigestSize: static int](
+      H: type CryptoHash, digest: var array[DigestSize, byte], input0: openArray[byte], input1: openArray[byte]) =
 
   static: doAssert DigestSize == H.digestSize()
 
@@ -329,9 +331,9 @@ func hash[DigestSize: static int, T0, T1: char|byte](
   h.update(input1)
   h.finish(digest)
 
-func init*[T0, T1: char|byte](
-       ctx: var BLSBatchSigAccumulator, domainSepTag: openArray[T0],
-       secureRandomBytes: array[32, byte], accumSepTag: openArray[T1]) =
+func init*(
+       ctx: var BLSBatchSigAccumulator, domainSepTag: openArray[byte],
+       secureRandomBytes: array[32, byte], accumSepTag: openArray[byte]) {.genCharAPI.} =
   ## Initializes a Batch BLS Signature accumulator context.
   ##
   ## This requires cryptographically secure random bytes
@@ -352,25 +354,21 @@ func init*[T0, T1: char|byte](
   if domainSepTag.len > 255:
     var t {.noInit.}: array[H.digestSize(), byte]
     H.shortDomainSepTag(output = t, domainSepTag)
-    copy(ctx.domainSepTag, dStart = 0,
-        t, sStart = 0,
-        H.digestSize())
+    rawCopy(ctx.domainSepTag, dStart = 0, t, sStart = 0, H.digestSize())
     ctx.dst_len = uint8 H.digestSize()
   else:
-    copy(ctx.domainSepTag, dStart = 0,
-        domainSepTag, sStart = 0,
-        domainSepTag.len)
+    rawCopy(ctx.domainSepTag, dStart = 0, domainSepTag, sStart = 0, domainSepTag.len)
     ctx.dst_len = uint8 domainSepTag.len
   for i in ctx.dst_len ..< ctx.domainSepTag.len:
     ctx.domainSepTag[i] = byte 0
 
   H.hash(ctx.secureBlinding, secureRandomBytes, accumSepTag)
 
-func update*[T: char|byte, Pubkey, Sig: ECP_ShortW_Aff](
+func update*[Pubkey, Sig: ECP_ShortW_Aff](
        ctx: var BLSBatchSigAccumulator,
        pubkey: Pubkey,
-       message: openArray[T],
-       signature: Sig): bool =
+       message: openArray[byte],
+       signature: Sig): bool {.genCharAPI.} =
   ## Add a (public key, message, signature) triplet
   ## to a BLS signature accumulator
   ##
@@ -480,6 +478,13 @@ func update*[T: char|byte, Pubkey, Sig: ECP_ShortW_Aff](
     hmsgG1_aff.affine(hmsgG1_jac)
     ctx.millerAccum.update(hmsgG1_aff, pubkey)
 
+func update*[Pubkey, Sig: ECP_ShortW_Aff](
+       ctx: var BLSBatchSigAccumulator,
+       pubkey: Pubkey,
+       message: View[byte],
+       signature: Sig): bool {.inline.} =
+  ctx.update(pubkey, message, signature)
+
 func merge*(ctxDst: var BLSBatchSigAccumulator, ctxSrc: BLSBatchSigAccumulator): bool =
   ## Merge 2 BLS signature accumulators: ctxDst <- ctxDst + ctxSrc
   ##
@@ -548,13 +553,13 @@ func aggregate*[T: ECP_ShortW_Aff](r: var T, points: openarray[T]) =
   accum.sum_reduce_vartime(points)
   r.affine(accum)
 
-func fastAggregateVerify*[B1, B2: byte|char, Pubkey, Sig](
+func fastAggregateVerify*[Pubkey, Sig](
     pubkeys: openArray[Pubkey],
-    message: openarray[B1],
+    message: openArray[byte],
     aggregateSignature: Sig,
     H: type CryptoHash,
     k: static int,
-    domainSepTag: openarray[B2]): bool =
+    domainSepTag: openArray[byte]): bool {.genCharAPI.} =
   ## Verify the aggregate of multiple signatures on the same message by multiple pubkeys
   ## Assumes pubkeys and sig have been checked for non-infinity and group-checked.
 
@@ -563,15 +568,19 @@ func fastAggregateVerify*[B1, B2: byte|char, Pubkey, Sig](
 
   var aggPubkey {.noinit.}: Pubkey
   aggPubkey.aggregate(pubkeys)
+
+  if bool(aggPubkey.isInf()):
+    return false
+
   aggPubkey.coreVerify(message, aggregateSignature, H, k, augmentation = "", domainSepTag)
 
-func aggregateVerify*[Msg; B: byte|char, Pubkey, Sig](
+func aggregateVerify*[Msg, Pubkey, Sig](
     pubkeys: openArray[Pubkey],
     messages: openArray[Msg],
     aggregateSignature: Sig,
     H: type CryptoHash,
     k: static int,
-    domainSepTag: openarray[B]): bool =
+    domainSepTag: openarray[byte]): bool {.genCharAPI.} =
   ## Verify the aggregated signature of multiple (pubkey, message) pairs
   ## Assumes pubkeys and the aggregated signature have been checked for non-infinity and group-checked.
   ##
@@ -598,14 +607,14 @@ func aggregateVerify*[Msg; B: byte|char, Pubkey, Sig](
 
   return accum.finalVerify(aggregateSignature)
 
-func batchVerify*[Msg; B: byte|char, Pubkey, Sig](
+func batchVerify*[Msg, Pubkey, Sig](
     pubkeys: openArray[Pubkey],
     messages: openArray[Msg],
     signatures: openArray[Sig],
     H: type CryptoHash,
     k: static int,
-    domainSepTag: openarray[B],
-    secureRandomBytes: array[32, byte]): bool =
+    domainSepTag: openarray[byte],
+    secureRandomBytes: array[32, byte]): bool {.genCharAPI.} =
   ## Verify that all (pubkey, message, signature) triplets are valid
   ##
   ## Returns false if there is at least one incorrect signature
diff --git a/constantine/zoo_exports.nim b/constantine/zoo_exports.nim
new file mode 100644
index 000000000..3f10d7ea7
--- /dev/null
+++ b/constantine/zoo_exports.nim
@@ -0,0 +1,52 @@
+# Constantine
+# Copyright (c) 2018-2019    Status Research & Development GmbH
+# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
+# Licensed and distributed under either of
+#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+# at your option. This file may not be copied, modified, or distributed except according to those terms.
+
+# This module allows flexible exports of procedures.
+# 1. This allows configuring all exported names from the protocol files
+#    instead of having those in many different places.
+# 2. No extra public wrapper proc are needed, reducing function call/return overhead.
+#    i.e. if we have an inner sha256.hash function
+#         and we need an exported `ctt_sha256_hash` and we also have a `hash_to_curve` function
+#         that internally uses `sha256.hash`,
+#         the ideal outcome is for `sha256.hash to be exported as `ctt_sha256_hash` and
+#         have `hash_to_curve` directly use that.
+# 3. Furthermore while compiling Nim only, no export marker (cdecl, dynlib, exportc) are used.
+#
+# Each prefix must be modified before importing the module to export
+
+# Exportable functions
+# ----------------------------------------------------------------------------------------------
+
+var prefix_sha256* {.compileTime.} = ""
+
+# Conditional exports
+# ----------------------------------------------------------------------------------------------
+
+import std/macros
+
+macro libPrefix*(prefix: static string, procAst: untyped): untyped =
+  if prefix == "":
+    return procAst
+  else:
+    var pragmas = procAst.pragma
+    if pragmas.kind == nnkEmpty:
+      pragmas = nnkPragma.newTree()
+
+    pragmas.add ident"cdecl"
+    pragmas.add nnkExprColonExpr.newTree(
+      ident"exportc",
+      newLit(prefix & "$1"))
+    pragmas.add nnkExprColonExpr.newTree(
+      ident"raises",
+      nnkBracket.newTree())
+
+    if appType == "lib":
+      pragmas.add ident"dynlib"
+
+    result = procAst
+    result.pragma = pragmas
diff --git a/examples_c/ethereum_bls_signatures.c b/examples_c/ethereum_bls_signatures.c
new file mode 100644
index 000000000..419e6c3db
--- /dev/null
+++ b/examples_c/ethereum_bls_signatures.c
@@ -0,0 +1,63 @@
+/** Constantine
+ *  Copyright (c) 2018-2019    Status Research & Development GmbH
+ *  Copyright (c) 2020-Present Mamy André-Ratsimbazafy
+ *  Licensed and distributed under either of
+ *    * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+ *    * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+ *  at your option. This file may not be copied, modified, or distributed except according to those terms.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <constantine_ethereum_bls_signatures.h>
+
+int main(){
+
+  // Initialize the runtime. For Constantine, it populates the CPU runtime detection dispatch.
+  ctt_eth_bls_init_NimMain();
+
+  ctt_eth_bls_status status;
+
+  // Declare an example insecure non-cryptographically random non-secret key. DO NOT USE IN PRODUCTION.
+  byte raw_seckey[32] = "Security pb becomes key mgmt pb!";
+  ctt_eth_bls_seckey seckey;
+
+  status = ctt_eth_bls_deserialize_seckey(&seckey, raw_seckey);
+  if (status != cttBLS_Success) {
+    printf("Secret key deserialization failure: status %d - %s\n", status, ctt_eth_bls_status_to_string(status));
+    exit(1);
+  }
+
+  // Derive the matching public key
+  ctt_eth_bls_pubkey pubkey;
+
+  status = ctt_eth_bls_derive_pubkey(&pubkey, &seckey);
+  if (status != cttBLS_Success) {
+    printf("Public key derivation failure: status %d - %s\n", status, ctt_eth_bls_status_to_string(status));
+    exit(1);
+  }
+
+  // Sign a message
+  byte message[32];
+  ctt_eth_bls_signature sig;
+
+  ctt_eth_bls_sha256_hash(message, "Mr F was here", 13, /* clear_memory = */ 0);
+
+  status = ctt_eth_bls_sign(&sig, &seckey, message, 32);
+  if (status != cttBLS_Success) {
+    printf("Message signing failure: status %d - %s\n", status, ctt_eth_bls_status_to_string(status));
+    exit(1);
+  }
+
+  // Verify that a signature is valid for a message under the provided public key
+  status = ctt_eth_bls_verify(&pubkey, message, 32, &sig);
+  if (status != cttBLS_Success) {
+    printf("Signature verification failure: status %d - %s\n", status, ctt_eth_bls_status_to_string(status));
+    exit(1);
+  }
+
+  printf("Example BLS signature/verification protocol completed successfully\n");
+  return 0;
+}
\ No newline at end of file
diff --git a/examples_c/t_libctt_bls12_381.c b/examples_c/t_libctt_bls12_381.c
index ecd61c657..dd1969e29 100644
--- a/examples_c/t_libctt_bls12_381.c
+++ b/examples_c/t_libctt_bls12_381.c
@@ -1,21 +1,21 @@
-// Constantine
-// Copyright (c) 2018-2019    Status Research & Development GmbH
-// Copyright (c) 2020-Present Mamy André-Ratsimbazafy
-// Licensed and distributed under either of
-//   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
-//   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
-// at your option. This file may not be copied, modified, or distributed except according to those terms.
+/** Constantine
+ *  Copyright (c) 2018-2019    Status Research & Development GmbH
+ *  Copyright (c) 2020-Present Mamy André-Ratsimbazafy
+ *  Licensed and distributed under either of
+ *    * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+ *    * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+ *  at your option. This file may not be copied, modified, or distributed except according to those terms.
+ */
 
 // This is a test to ensure Constantine's modular arithmetic is consistent with GMP.
 // While not intended as a tutorial, it showcases serialization, deserialization and computation.
 
 #include <assert.h>
-#include <gmp.h>
-#include <constantine_bls12_381.h>
 #include <stdio.h>
 #include <stdlib.h>
 
-typedef unsigned char byte;
+#include <gmp.h>
+#include <constantine_bls12_381.h>
 
 // https://gmplib.org/manual/Integer-Import-and-Export.html
 const int GMP_WordLittleEndian = -1;
diff --git a/helpers/explain_bin_size.sh b/helpers/explain_bin_size.sh
new file mode 100644
index 000000000..ee544a78d
--- /dev/null
+++ b/helpers/explain_bin_size.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+# Explain size of ELF .o files (does not work with gcc -flto).
+nm -oS --defined-only -fposix -td "$@" |
+    sort -nk5 | awk '{print $1,$2,$3,$5}'
\ No newline at end of file
diff --git a/helpers/pararun.nim b/helpers/pararun.nim
index 663fd711d..ec44dcb06 100644
--- a/helpers/pararun.nim
+++ b/helpers/pararun.nim
@@ -7,7 +7,7 @@
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
 import
-  std/[os, strutils, cpuinfo, strformat, deques, terminal],
+  std/[os, strutils, cpuinfo, strformat, deques],
   std/[asyncfutures, asyncdispatch],
   asynctools/[asyncproc, asyncpipe, asyncsync]
 
@@ -46,7 +46,7 @@ proc release(s: AsyncSemaphore) =
   if s.waiters.len > 0:
     let waiter = s.waiters.popFirst()
     waiter.complete()
-  
+
   doAssert s.slots in {0..s.max}
 
 # Task runner
@@ -55,72 +55,90 @@ proc release(s: AsyncSemaphore) =
 type WorkQueue = ref object
   sem: AsyncSemaphore
   cmdQueue: Deque[string]
-  outputQueue: AsyncQueue[tuple[cmd: string, p: AsyncProcess]]
-  lineBuf: string
+  outputQueue: AsyncQueue[tuple[cmd: string, p: AsyncProcess, output: AsyncQueue[string]]]
 
-proc releaseOnProcessExit(sem: AsyncSemaphore, p: AsyncProcess) {.async.} =
-  # TODO: addProcess callback on exit is cleaner but locks the AsyncPipe "readInto"
-  #
-  # p.processID.addProcess do (fd: AsyncFD) -> bool:
-  #   sem.release()
-  #
-  # see also: https://forum.nim-lang.org/t/5565
-  # and https://github.com/cheatfate/asynctools/issues/20
+proc monitorProcessLoop(output: AsyncQueue[string], cmd: string, id, total: int, p: AsyncProcess, sem: AsyncSemaphore) {.async.} =
+  # Ideally we want AsynStreams but that requires chronos, which doesn't support processes/pipes
+  # Or the nimboost package that hasn't been updated since 2019. So poor man's streams.
+  template doBuffering: untyped =
+    while true:
+      buf.setLen(256)
+      let charsRead = await p.outputHandle.readInto(buf[0].addr, buf.len)
+      if charsRead > 0:
+        buf.setLen(charsRead)
+        output.putNoWait(buf)
+      else:
+        break
+
+  var buf = newString(256)
+  doBuffering()
 
+  # Despite the output being empty we might still get STILL_ACTIVE: https://github.com/cheatfate/asynctools/blob/84ced6d/asynctools/asyncproc.nim#L24
+  # Unfortunately this gives "Resource temporarily unavailable" so we use exponential backoff.
+  # See also:
+  #  - https://github.com/cheatfate/asynctools/issues/20
+  #  - https://forum.nim-lang.org/t/5565
+  #
+  # let exitCode = await p.waitForExit()
   var backoff = 8
   while p.running():
     backoff = min(backoff*2, 1024) # Exponential backoff
     await sleepAsync(backoff)
-  sem.release()
+
+  doBuffering()
+  buf.setLen(0)
+
+  let exitCode = p.peekExitCode()
+  if exitCode != 0:
+    buf.add("\n" & '='.repeat(26) & " Command exited with code " & $exitCode & " " & '='.repeat(26) & '\n')
+    buf.add("[FAIL]: '" & cmd & "' (#" & $id & "/" & $total & ")\n")
+    buf.add("[FAIL]: Command #" & $id & " exited with error " & $exitCode & '\n')
+    buf.add('='.repeat(80) & '\n')
+    output.putNoWait(buf)
+
+  # close not exported: https://github.com/cheatfate/asynctools/issues/16
+  p.inputHandle.close()
+  p.outputHandle.close()
+  p.errorHandle.close()
+
+  output.putNoWait("")
+  if exitCode == 0:
+    sem.release()
 
 proc enqueuePendingCommands(wq: WorkQueue) {.async.} =
+  var id = 0
+  let total = wq.cmdQueue.len
   while wq.cmdQueue.len > 0:
+    id += 1
+
     await wq.sem.acquire()
     let cmd = wq.cmdQueue.popFirst()
-    let p = cmd.startProcess(
-      options = {poStdErrToStdOut, poUsePath, poEvalCommand}
-    )
-    p.inputHandle.close()
+    let p = cmd.startProcess(options = {poStdErrToStdOut, poUsePath, poEvalCommand})
+
+    let bufOut = newAsyncQueue[string]()
+    asyncCheck bufOut.monitorProcessLoop(cmd, id, total, p, wq.sem)
 
-    asyncCheck wq.sem.releaseOnProcessExit(p)
-    wq.outputQueue.putNoWait((cmd, p))
+    wq.outputQueue.putNoWait((cmd, p, bufOut))
 
-proc flushCommandsOutput(wq: WorkQueue) {.async.} =
+proc flushCommandsOutput(wq: WorkQueue, total: int) {.async.} =
   var id = 0
   while true:
-    let (cmd, p) = await wq.outputQueue.get()
-    
+    id += 1
+    let (cmd, p, processOutput) = await wq.outputQueue.get()
+
     echo '\n', '='.repeat(80)
-    echo "||\n|| Running: ", cmd ,"\n||"
+    echo "||\n|| Running #", id, "/", total, ": ", cmd ,"\n||"
     echo '='.repeat(80)
-    
+
     while true:
-      let charsRead = await p.outputHandle.readInto(wq.lineBuf[0].addr, wq.lineBuf.len)
-      if charsRead == 0:
+      let output = await processOutput.get()
+      if output == "":
         break
-      let charsWritten = stdout.writeBuffer(wq.lineBuf[0].addr, charsRead)
-      doAssert charsRead == charsWritten
-    
-    # close not exported: https://github.com/cheatfate/asynctools/issues/16
-    p.outputHandle.close()
-    
-    let exitCode = p.peekExitCode()
-    if exitCode == 259:
-      echo "==== Command exited with code 259 ===="
-      echo "[SKIP]: '", cmd, "' (#", id, ")"
-      echo "==== Custom stacktrace ===="
-      writeStackTrace()
-      echo "==== Custom stacktrace ===="
-      echo "[SKIP]: Assuming process was unregistered when trying to retrieve its exit code"
-    elif exitCode != 0:
-      echo "==== Command exited with code ", exitCode, " ===="
-      echo "[FAIL]: '", cmd, "' (#", id, ")"
-      echo "==== Custom stacktrace ===="
-      writeStackTrace()
-      echo "==== Custom stacktrace ===="
-      quit "[FAIL]: Command #" & $id & " exited with error " & $exitCode, exitCode
+      stdout.write(output)
 
-    id += 1
+    let exitCode = p.peekExitCode()
+    if exitCode != 0:
+      quit exitCode
 
     if wq.cmdQueue.len == 0 and wq.outputQueue.len == 0:
       return
@@ -132,9 +150,7 @@ proc runCommands(commandFile: string, numWorkers: int) =
   let wq = WorkQueue(
     sem: AsyncSemaphore.new(numWorkers),
     cmdQueue: initDeque[string](),
-    outputQueue: newAsyncQueue[tuple[cmd: string, p: AsyncProcess]](),
-    lineBuf: newString(max(80, terminalWidth()))
-  )
+    outputQueue: newAsyncQueue[tuple[cmd: string, p: AsyncProcess, output: AsyncQueue[string]]]())
 
   # Parse the file
   # --------------
@@ -142,16 +158,17 @@ proc runCommands(commandFile: string, numWorkers: int) =
     if cmd.len == 0: continue
     wq.cmdQueue.addLast(cmd)
 
-  echo "Found ", wq.cmdQueue.len, " commands to run"
-  
+  let total = wq.cmdQueue.len
+  echo "Found ", total, " commands to run"
+
   # Run the commands
   # ----------------
   asyncCheck wq.enqueuePendingCommands()
-  waitFor wq.flushCommandsOutput()
+  waitFor wq.flushCommandsOutput(total)
 
 # Main
 # ----------------------------------------------------------------
-  
+
 proc main() =
   var commandFile: string
   var numWorkers = countProcessors()
@@ -162,7 +179,7 @@ proc main() =
 
   if paramCount() >= 1:
     commandFile = paramStr(1)
-  
+
   if paramCount() == 2:
     numWorkers = paramStr(2).parseInt()
 
diff --git a/include/constantine_bls12_381.h b/include/constantine_bls12_381.h
index a6e59b793..0861bc9ba 100644
--- a/include/constantine_bls12_381.h
+++ b/include/constantine_bls12_381.h
@@ -1,11 +1,10 @@
-/*
- * Constantine
- * Copyright (c) 2018-2019    Status Research & Development GmbH
- * Copyright (c) 2020-Present Mamy André-Ratsimbazafy
- * Licensed and distributed under either of
- *   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
- *   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
- * at your option. This file may not be copied, modified, or distributed except according to those terms.
+/** Constantine
+ *  Copyright (c) 2018-2019    Status Research & Development GmbH
+ *  Copyright (c) 2020-Present Mamy André-Ratsimbazafy
+ *  Licensed and distributed under either of
+ *    * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+ *    * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+ *  at your option. This file may not be copied, modified, or distributed except according to those terms.
  */
 #ifndef __CTT_H_BLS12381__
 #define __CTT_H_BLS12381__
diff --git a/include/constantine_ethereum_bls_signatures.h b/include/constantine_ethereum_bls_signatures.h
new file mode 100644
index 000000000..2e07a49ed
--- /dev/null
+++ b/include/constantine_ethereum_bls_signatures.h
@@ -0,0 +1,353 @@
+/** Constantine
+ *  Copyright (c) 2018-2019    Status Research & Development GmbH
+ *  Copyright (c) 2020-Present Mamy André-Ratsimbazafy
+ *  Licensed and distributed under either of
+ *    * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+ *    * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+ *  at your option. This file may not be copied, modified, or distributed except according to those terms.
+ */
+#ifndef __CTT_H_ETHEREUM_BLS_SIGNATURES__
+#define __CTT_H_ETHEREUM_BLS_SIGNATURES__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Basic Types
+// ------------------------------------------------------------------------------------------------
+
+#if defined(__SIZE_TYPE__) && defined(__PTRDIFF_TYPE__)
+typedef __SIZE_TYPE__    size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+#else
+#include <stddef.h>
+#endif
+
+#if defined(__UINT8_TYPE__) && defined(__UINT32_TYPE__) && defined(__UINT64_TYPE__)
+typedef __UINT8_TYPE__   uint8_t;
+typedef __UINT32_TYPE__  uint32_t;
+typedef __UINT64_TYPE__  uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+// https://github.com/nim-lang/Nim/blob/v1.6.12/lib/nimbase.h#L318
+#if defined(__STDC_VERSION__) && __STDC_VERSION__>=199901
+# define bool _Bool
+#else
+# define bool unsigned char
+#endif
+
+typedef uint8_t          byte;
+
+// Attributes
+// ------------------------------------------------------------------------------------------------
+
+#if defined(_MSC_VER)
+#  define ctt_pure __declspec(noalias)
+#elif defined(__GNUC__)
+#  define ctt_pure __attribute__((pure))
+#else
+#  define ctt_pure
+#endif
+
+#if defined(_MSC_VER)
+#  define align(x)  __declspec(align(x))
+#else
+#  define align(x)  __attribute__((aligned(x)))
+#endif
+
+// BLS signature types
+// ------------------------------------------------------------------------------------------------
+
+#define FIELD_BITS 381
+#define ORDER_BITS 255
+#define BYTES(bits) ((int) ((bits) + 8 - 1) / 8)
+
+struct ctt_eth_bls_fp { byte raw[BYTES(FIELD_BITS)]; };
+struct ctt_eth_bls_fp2 { struct ctt_eth_bls_fp coords[2]; };
+
+typedef struct { byte raw[BYTES(ORDER_BITS)]; } ctt_eth_bls_seckey;
+typedef struct { struct ctt_eth_bls_fp  x, y; } ctt_eth_bls_pubkey;
+typedef struct { struct ctt_eth_bls_fp2 x, y; } ctt_eth_bls_signature;
+
+typedef enum __attribute__((__packed__)) {
+    cttBLS_Success,
+    cttBLS_VerificationFailure,
+    cttBLS_InvalidEncoding,
+    cttBLS_CoordinateGreaterOrEqualThanModulus,
+    cttBLS_PointAtInfinity,
+    cttBLS_PointNotOnCurve,
+    cttBLS_PointNotInSubgroup,
+    cttBLS_ZeroSecretKey,
+    cttBLS_SecretKeyLargerThanCurveOrder,
+    cttBLS_ZeroLengthAggregation,
+    cttBLS_InconsistentLengthsOfInputs,
+} ctt_eth_bls_status;
+
+static const char* ctt_eth_bls_status_to_string(ctt_eth_bls_status status) {
+  static const char* const statuses[] = {
+    "cttBLS_Success",
+    "cttBLS_VerificationFailure",
+    "cttBLS_InvalidEncoding",
+    "cttBLS_CoordinateGreaterOrEqualThanModulus",
+    "cttBLS_PointAtInfinity",
+    "cttBLS_PointNotOnCurve",
+    "cttBLS_PointNotInSubgroup",
+    "cttBLS_ZeroSecretKey",
+    "cttBLS_SecretKeyLargerThanCurveOrder",
+    "cttBLS_ZeroLengthAggregation",
+    "cttBLS_InconsistentLengthsOfInputs",
+  };
+  size_t length = sizeof statuses / sizeof *statuses;
+  if (0 <= status && status < length) {
+    return statuses[status];
+  }
+  return "cttBLS_InvalidStatusCode";
+}
+
+// Initialization
+// ------------------------------------------------------------------------------------------------
+
+/** Initializes the library:
+ *  - detect CPU features like ADX instructions support (MULX, ADCX, ADOX)
+ */
+void ctt_eth_bls_init_NimMain(void);
+
+// SHA-256
+// ------------------------------------------------------------------------------------------------
+
+typedef struct {
+  align(64) uint32_t message_schedule[16];
+  align(64) byte     buf[64];
+            uint64_t msgLen;
+} ctt_eth_bls_sha256_context;
+
+/** Initialize or reinitialize a Sha256 context.
+ */
+void ctt_eth_bls_sha256_init(ctt_eth_bls_sha256_context* ctx);
+
+/** Append a message to a SHA256 context
+ *  for incremental SHA256 computation
+ *
+ *  Security note: the tail of your message might be stored
+ *  in an internal buffer.
+ *  if sensitive content is used, ensure that
+ *  `ctx.finish(...)` and `ctx.clear()` are called as soon as possible.
+ *  Additionally ensure that the message(s) passed were stored
+ *  in memory considered secure for your threat model.
+ *
+ *  For passwords and secret keys, you MUST NOT use raw SHA-256
+ *  use a Key Derivation Function instead (KDF)
+ */
+void ctt_eth_bls_sha256_update(ctt_eth_bls_sha256_context* ctx, const byte* message, ptrdiff_t message_len);
+
+/** Finalize a SHA256 computation and output the
+ *  message digest to the `digest` buffer.
+ *
+ *  Security note: this does not clear the internal buffer.
+ *  if sensitive content is used, use "ctx.clear()"
+ *  and also make sure that the message(s) passed were stored
+ *  in memory considered secure for your threat model.
+ *
+ *  For passwords and secret keys, you MUST NOT use raw SHA-256
+ *  use a Key Derivation Function instead (KDF)
+ */
+void ctt_eth_bls_sha256_finish(ctt_eth_bls_sha256_context* ctx, byte digest[32]);
+
+/** Clear the context internal buffers
+ *  Security note:
+ *  For passwords and secret keys, you MUST NOT use raw SHA-256
+ *  use a Key Derivation Function instead (KDF)
+ */
+void ctt_eth_bls_sha256_clear(ctt_eth_bls_sha256_context* ctx);
+
+/** Compute the SHA-256 hash of message
+ *  and store the result in digest.
+ *  Optionally, clear the memory buffer used.
+ */
+void ctt_eth_bls_sha256_hash(byte digest[32], const byte* message, ptrdiff_t message_len, bool clear_memory);
+
+// Comparisons
+// ------------------------------------------------------------------------------------------------
+
+ctt_pure bool ctt_eth_bls_pubkey_is_zero(const ctt_eth_bls_pubkey* pubkey);
+ctt_pure bool ctt_eth_bls_signature_is_zero(const ctt_eth_bls_signature* sig);
+
+ctt_pure bool ctt_eth_bls_pubkeys_are_equal(const ctt_eth_bls_pubkey* a,
+                                            const ctt_eth_bls_pubkey* b);
+ctt_pure bool ctt_eth_bls_signatures_are_equal(const ctt_eth_bls_signature* a,
+                                               const ctt_eth_bls_signature* b);
+
+// Input validation
+// ------------------------------------------------------------------------------------------------
+
+/** Validate the secret key.
+ *
+ *  Regarding timing attacks, this will leak timing information only if the key is invalid.
+ *  Namely, the secret key is 0 or the secret key is too large.
+ */
+ctt_pure ctt_eth_bls_status ctt_eth_bls_validate_seckey(const ctt_eth_bls_seckey* seckey);
+
+/** Validate the public key.
+ *
+ *  This is an expensive operation that can be cached.
+ */
+ctt_pure ctt_eth_bls_status ctt_eth_bls_validate_pubkey(const ctt_eth_bls_pubkey* pubkey);
+
+/** Validate the signature.
+ *
+ *  This is an expensive operation that can be cached.
+ */
+ctt_pure ctt_eth_bls_status ctt_eth_bls_validate_signature(const ctt_eth_bls_signature* pubkey);
+
+// Codecs
+// ------------------------------------------------------------------------------------------------
+/** Serialize a secret key
+ *
+ *  Returns cttBLS_Success if successful
+ */
+ctt_eth_bls_status ctt_eth_bls_serialize_seckey(byte dst[32], const ctt_eth_bls_seckey* seckey);
+
+/** Serialize a public key in compressed (Zcash) format
+ *
+ *  Returns cttBLS_Success if successful
+ */
+ctt_eth_bls_status ctt_eth_bls_serialize_pubkey_compressed(byte dst[48], const ctt_eth_bls_pubkey* pubkey);
+
+/** Serialize a signature in compressed (Zcash) format
+ *
+ *  Returns cttBLS_Success if successful
+ */
+ctt_eth_bls_status ctt_eth_bls_serialize_signature_compressed(byte dst[96], const ctt_eth_bls_signature* sig);
+
+/** Deserialize a secret key
+ *  This also validates the secret key.
+ *
+ *  This is protected against side-channel unless your key is invalid.
+ *  In that case it will like whether it's all zeros or larger than the curve order.
+ */
+ctt_eth_bls_status ctt_eth_bls_deserialize_seckey(ctt_eth_bls_seckey* seckey, const byte src[32]);
+
+/** Deserialize a public key in compressed (Zcash) format.
+ *  This does not validate the public key.
+ *  It is intended for cases where public keys are stored in a trusted location
+ *  and validation can be cached.
+ *
+ *  Warning ⚠:
+ *    This procedure skips the very expensive subgroup checks.
+ *    Not checking subgroup exposes a protocol to small subgroup attacks.
+ *
+ *  Returns cttBLS_Success if successful
+ */
+ctt_eth_bls_status ctt_eth_bls_deserialize_pubkey_compressed_unchecked(ctt_eth_bls_pubkey* pubkey, const byte src[48]);
+
+/** Deserialize a public_key in compressed (Zcash) format.
+ *  This also validates the public key.
+ *
+ *  Returns cttBLS_Success if successful
+ */
+ctt_eth_bls_status ctt_eth_bls_deserialize_pubkey_compressed(ctt_eth_bls_pubkey* pubkey, const byte src[48]);
+
+/** Deserialize a signature in compressed (Zcash) format.
+ *  This does not validate the signature.
+ *  It is intended for cases where public keys are stored in a trusted location
+ *  and validation can be cached.
+ *
+ *  Warning ⚠:
+ *    This procedure skips the very expensive subgroup checks.
+ *    Not checking subgroup exposes a protocol to small subgroup attacks.
+ *
+ *  Returns cttBLS_Success if successful
+ */
+ctt_eth_bls_status ctt_eth_bls_deserialize_signature_compressed_unchecked(ctt_eth_bls_signature* sig, const byte src[96]);
+
+/** Deserialize a signature in compressed (Zcash) format.
+ *  This also validates the signature.
+ *
+ *  Returns cttBLS_Success if successful
+ */
+ctt_eth_bls_status ctt_eth_bls_deserialize_signature_compressed(ctt_eth_bls_signature* sig, const byte src[96]);
+
+// BLS signatures
+// ------------------------------------------------------------------------------------------------
+
+/** Derive the public key matching with a secret key
+ *
+ *  Secret protection:
+ *  - A valid secret key will only leak that it is valid.
+ *  - An invalid secret key will leak whether it's all zero or larger than the curve order.
+ */
+ctt_eth_bls_status ctt_eth_bls_derive_pubkey(ctt_eth_bls_pubkey* pubkey, const ctt_eth_bls_seckey* seckey);
+
+/** Produce a signature for the message under the specified secret key
+ *  Signature is on BLS12-381 G2 (and public key on G1)
+ *
+ *  For message domain separation purpose, the tag is `BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_`
+ *
+ *  Input:
+ *  - A secret key
+ *  - A message
+ *
+ *  Output:
+ *  - `signature` is overwritten with `message` signed with `secretKey`
+ *    with the scheme
+ *  - A status code indicating success or if the secret key is invalid.
+ *
+ *  Secret protection:
+ *  - A valid secret key will only leak that it is valid.
+ *  - An invalid secret key will leak whether it's all zero or larger than the curve order.
+ */
+ctt_eth_bls_status ctt_eth_bls_sign(ctt_eth_bls_signature* sig,
+                                    const ctt_eth_bls_seckey* seckey,
+                                    const byte* message, ptrdiff_t message_len);
+
+/** Check that a signature is valid for a message
+ *  under the provided public key.
+ *  returns `true` if the signature is valid, `false` otherwise.
+ *
+ *  For message domain separation purpose, the tag is `BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_`
+ *
+ *  Input:
+ *  - A public key initialized by one of the key derivation or deserialization procedure.
+ *    Or validated via validate_pubkey
+ *  - A message
+ *  - A signature initialized by one of the key derivation or deserialization procedure.
+ *    Or validated via validate_signature
+ *
+ *  Output:
+ *  - a status code with verification success if signature is valid
+ *    or indicating verification failure
+ *
+ *  In particular, the public key and signature are assumed to be on curve and subgroup-checked.
+ */
+ctt_pure ctt_eth_bls_status ctt_eth_bls_verify(const ctt_eth_bls_pubkey* pubkey,
+                                               const byte* message, ptrdiff_t message_len,
+                                               const ctt_eth_bls_signature* sig);
+
+// TODO: API for pubkeys and signature aggregation. Return a bool or a status code or nothing?
+
+/** Check that a signature is valid for a message
+ *  under the aggregate of provided public keys.
+ *  returns `true` if the signature is valid, `false` otherwise.
+ *
+ *  For message domain separation purpose, the tag is `BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_`
+ *
+ *  Input:
+ *  - Public keys initialized by one of the key derivation or deserialization procedure.
+ *    Or validated via validate_pubkey
+ *  - A message
+ *  - A signature initialized by one of the key derivation or deserialization procedure.
+ *    Or validated via validate_signature
+ *
+ *  In particular, the public keys and signature are assumed to be on curve subgroup checked.
+ */
+ctt_pure ctt_eth_bls_status ctt_eth_bls_fast_aggregate_verify(const ctt_eth_bls_pubkey pubkeys[], ptrdiff_t pubkeys_len,
+                                                              const byte* message, ptrdiff_t message_len,
+                                                              const ctt_eth_bls_signature* aggregate_sig);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/constantine_pasta.h b/include/constantine_pasta.h
index b007918aa..5acbff181 100644
--- a/include/constantine_pasta.h
+++ b/include/constantine_pasta.h
@@ -1,11 +1,10 @@
-/*
- * Constantine
- * Copyright (c) 2018-2019    Status Research & Development GmbH
- * Copyright (c) 2020-Present Mamy André-Ratsimbazafy
- * Licensed and distributed under either of
- *   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
- *   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
- * at your option. This file may not be copied, modified, or distributed except according to those terms.
+/** Constantine
+ *  Copyright (c) 2018-2019    Status Research & Development GmbH
+ *  Copyright (c) 2020-Present Mamy André-Ratsimbazafy
+ *  Licensed and distributed under either of
+ *    * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+ *    * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+ *  at your option. This file may not be copied, modified, or distributed except according to those terms.
  */
 #ifndef __CTT_H_PASTA__
 #define __CTT_H_PASTA__
diff --git a/lib/.gitignore b/lib/.gitignore
new file mode 100644
index 000000000..1cbdcaf01
--- /dev/null
+++ b/lib/.gitignore
@@ -0,0 +1,5 @@
+# Ignore everything in this directory
+*
+# Except this file and README
+!.gitignore
+!README.md
\ No newline at end of file
diff --git a/tests/gpu/hello_world_nvidia.nim b/tests/gpu/hello_world_nvidia.nim
index 4b4bfc140..702ab0554 100644
--- a/tests/gpu/hello_world_nvidia.nim
+++ b/tests/gpu/hello_world_nvidia.nim
@@ -6,7 +6,7 @@
 #   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
-import ../../constantine/platforms/gpu/[llvm, nvidia, bindings/utils]
+import ../../constantine/platforms/gpu/[llvm, nvidia, bindings/c_abi]
 
 # ############################################################
 #
@@ -60,8 +60,8 @@ proc nvvmIRVersion*(majorIR, minorIR, majorDbg, minorDbg: var int32): NvvmResult
 
 proc nvvmCreateProgram*(prog: var NvvmProgram): NvvmResult
 proc nvvmDestroyProgram*(prog: var NvvmProgram): NvvmResult
-proc nvvmAddModuleToProgram*(prog: NvvmProgram, buffer: openArray[byte], name: cstring): NvvmResult {.wrapOpenArrayLenType: csize_t.} 
-proc nvvmLazyAddModuleToProgram*(prog: NvvmProgram, buffer: openArray[byte], name: cstring): NvvmResult {.wrapOpenArrayLenType: csize_t.} 
+proc nvvmAddModuleToProgram*(prog: NvvmProgram, buffer: openArray[byte], name: cstring): NvvmResult {.wrapOpenArrayLenType: csize_t.}
+proc nvvmLazyAddModuleToProgram*(prog: NvvmProgram, buffer: openArray[byte], name: cstring): NvvmResult {.wrapOpenArrayLenType: csize_t.}
 proc nvvmCompileProgram*(prog: NvvmProgram; numOptions: int32; options: cstringArray): NvvmResult
 proc nvvmVerifyProgram*(prog: NvvmProgram; numOptions: int32; options: cstringArray): NvvmResult
 proc nvvmGetCompiledResultSize*(prog: NvvmProgram; bufferSizeRet: var csize_t): NvvmResult
@@ -93,7 +93,7 @@ proc getNvvmLog(prog: NvvmProgram): string {.used.} =
 
 proc ptxCodegenViaNvidiaNvvm(module: ModuleRef, sm: tuple[major, minor: int32]): string =
   ## PTX codegen via Nvidia NVVM
-  
+
   # ######################################
   # LLVM -> NNVM handover
 
@@ -120,7 +120,7 @@ proc ptxCodegenViaNvidiaNvvm(module: ModuleRef, sm: tuple[major, minor: int32]):
 
 proc ptxCodegenViaLlvmNvptx(module: ModuleRef, sm: tuple[major, minor: int32]): string =
   ## PTX codegen via LLVM NVPTX
-  
+
   module.verify(AbortProcessAction)
 
   initializeFullNVPTXTarget()
diff --git a/tests/math/t_ec_template.nim b/tests/math/t_ec_template.nim
index 3a64aac1d..91e1e73a1 100644
--- a/tests/math/t_ec_template.nim
+++ b/tests/math/t_ec_template.nim
@@ -85,10 +85,7 @@ func random_point*(rng: var RngState, EC: typedesc, randZ: bool, gen: RandomGen)
 proc run_EC_addition_tests*(
        ec: typedesc,
        Iters: static int,
-       moduleName: string
-     ) =
-
-
+       moduleName: string) =
   var rng: RngState
   let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
   rng.seed(seed)
@@ -274,9 +271,7 @@ proc run_EC_addition_tests*(
 proc run_EC_mul_sanity_tests*(
        ec: typedesc,
        ItersMul: static int,
-       moduleName: string
-     ) =
-
+       moduleName: string) =
   # Random seed for reproducibility
   var rng: RngState
   let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
@@ -306,16 +301,16 @@ proc run_EC_mul_sanity_tests*(
             bool(reference.isInf())
             bool(refMinWeight.isInf())
 
-          proc refWNaf(w: static int) = # workaround staticFor symbol visibility
+          proc refWNaf(bits, w: static int) = # workaround staticFor symbol visibility
             var refWNAF = a
-            refWNAF.scalarMul_minHammingWeight_windowed_vartime(exponent, window = w)
+            refWNAF.scalarMul_minHammingWeight_windowed_vartime(BigInt[bits](), window = w)
             check: bool(refWNAF.isInf())
 
-          refWNaf(2)
-          refWNaf(3)
-          refWNaf(5)
-          refWNaf(8)
-          refWNaf(13)
+          refWNaf(bits, w = 2)
+          refWNaf(bits, w = 3)
+          refWNaf(bits, w = 5)
+          refWNaf(bits, w = 8)
+          refWNaf(bits, w = 13)
 
       test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Uniform)
       test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Uniform)
@@ -381,9 +376,7 @@ proc run_EC_mul_sanity_tests*(
 proc run_EC_mul_distributive_tests*(
        ec: typedesc,
        ItersMul: static int,
-       moduleName: string
-     ) =
-
+       moduleName: string) =
   # Random seed for reproducibility
   var rng: RngState
   let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
@@ -446,9 +439,7 @@ proc run_EC_mul_distributive_tests*(
 proc run_EC_mul_vs_ref_impl*(
        ec: typedesc,
        ItersMul: static int,
-       moduleName: string
-     ) =
-
+       moduleName: string) =
   # Random seed for reproducibility
   var rng: RngState
   let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
@@ -501,9 +492,7 @@ proc run_EC_mul_vs_ref_impl*(
 proc run_EC_mixed_add_impl*(
        ec: typedesc,
        Iters: static int,
-       moduleName: string
-     ) =
-
+       moduleName: string) =
   # Random seed for reproducibility
   var rng: RngState
   let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
@@ -634,8 +623,7 @@ proc run_EC_mixed_add_impl*(
 proc run_EC_subgroups_cofactors_impl*(
        ec: typedesc,
        ItersMul: static int,
-       moduleName: string
-     ) =
+       moduleName: string) =
   # Random seed for reproducibility
   var rng: RngState
   let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
@@ -706,9 +694,7 @@ proc run_EC_subgroups_cofactors_impl*(
 proc run_EC_affine_conversion*(
        ec: typedesc,
        Iters: static int,
-       moduleName: string
-     ) =
-
+       moduleName: string) =
   # Random seed for reproducibility
   var rng: RngState
   let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
@@ -869,9 +855,7 @@ proc run_EC_conversion_failures*(
 proc run_EC_batch_add_impl*[N: static int](
        ec: typedesc,
        numPoints: array[N, int],
-       moduleName: string
-     ) =
-
+       moduleName: string) =
   # Random seed for reproducibility
   var rng: RngState
   let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
@@ -942,9 +926,7 @@ proc run_EC_batch_add_impl*[N: static int](
 proc run_EC_multi_scalar_mul_impl*[N: static int](
        ec: typedesc,
        numPoints: array[N, int],
-       moduleName: string
-     ) =
-
+       moduleName: string) =
   # Random seed for reproducibility
   var rng: RngState
   let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
diff --git a/tests/math/t_fp_tower_frobenius_template.nim b/tests/math/t_fp_tower_frobenius_template.nim
index c981171c6..60024a357 100644
--- a/tests/math/t_fp_tower_frobenius_template.nim
+++ b/tests/math/t_fp_tower_frobenius_template.nim
@@ -60,8 +60,7 @@ proc runFrobeniusTowerTests*[N](
       Iters: static int,
       TestCurves: static array[N, Curve],
       moduleName: string,
-      testSuiteDesc: string
-    ) =
+      testSuiteDesc: string) =
   # Random seed for reproducibility
   var rng: RngState
   let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
@@ -75,7 +74,6 @@ proc runFrobeniusTowerTests*[N](
           var a = rng.random_elem(Field, gen)
           var fa {.noInit.}: typeof(a)
           fa.frobenius_map(a, k = 1)
-
           a.powUnsafeExponent(Field.fieldMod(), window = 3)
           check: bool(a == fa)
 
diff --git a/tests/t_blssig_pop_on_bls12381_g2.nim b/tests/t_ethereum_bls_signatures.nim
similarity index 94%
rename from tests/t_blssig_pop_on_bls12381_g2.nim
rename to tests/t_ethereum_bls_signatures.nim
index b34d711c7..63b1e8e9a 100644
--- a/tests/t_blssig_pop_on_bls12381_g2.nim
+++ b/tests/t_ethereum_bls_signatures.nim
@@ -9,7 +9,7 @@
 import
   std/[os, unittest, strutils],
   pkg/jsony,
-  ../constantine/blssig_pop_on_bls12381_g2,
+  ../constantine/ethereum_bls_signatures,
   ../constantine/platforms/codecs,
   ../constantine/hashes
 
@@ -115,7 +115,7 @@ template testGen*(name, testData, TestType, body: untyped): untyped =
 testGen(deserialization_G1, testVector, DeserG1_test):
   var pubkey{.noInit.}: PublicKey
 
-  let status = pubkey.deserialize_public_key_compressed(testVector.input.pubkey)
+  let status = pubkey.deserialize_pubkey_compressed(testVector.input.pubkey)
   let success = status == cttBLS_Success or status == cttBLS_PointAtInfinity
 
   doAssert success == testVector.output, block:
@@ -126,7 +126,7 @@ testGen(deserialization_G1, testVector, DeserG1_test):
   if success: # Roundtrip
     var s{.noInit.}: array[48, byte]
 
-    let status2 = s.serialize_public_key_compressed(pubkey)
+    let status2 = s.serialize_pubkey_compressed(pubkey)
     doAssert status2 == cttBLS_Success
     doAssert s == testVector.input.pubkey, block:
       "\nSerialization roundtrip differs from expected \n" &
@@ -158,7 +158,7 @@ testGen(sign, testVector, Sign_test):
   var seckey{.noInit.}: SecretKey
   var sig{.noInit.}: Signature
 
-  let status = seckey.deserialize_secret_key(testVector.input.privkey)
+  let status = seckey.deserialize_seckey(testVector.input.privkey)
   if status != cttBLS_Success:
     doAssert testVector.output == default(array[96, byte])
     let status2 = sig.sign(seckey, testVector.input.message)
@@ -171,7 +171,7 @@ testGen(sign, testVector, Sign_test):
       var output{.noInit.}: Signature
       let status3 = output.deserialize_signature_compressed(testVector.output)
       doAssert status3 == cttBLS_Success
-      doAssert sig == output, block:
+      doAssert signatures_are_equal(sig, output), block:
         var sig_bytes{.noInit.}: array[96, byte]
         var roundtrip{.noInit.}: array[96, byte]
         let sb_status = sig_bytes.serialize_signature_compressed(sig)
@@ -198,7 +198,7 @@ testGen(verify, testVector, Verify_test):
     status = cttBLS_VerificationFailure
 
   block testChecks:
-    status = pubkey.deserialize_public_key_compressed(testVector.input.pubkey)
+    status = pubkey.deserialize_pubkey_compressed(testVector.input.pubkey)
     if status notin {cttBLS_Success, cttBLS_PointAtInfinity}:
       # For point at infinity, we want to make sure that "verify" itself handles them.
       break testChecks
@@ -218,7 +218,7 @@ testGen(verify, testVector, Verify_test):
   if success: # Extra codec testing
     block:
       var output{.noInit.}: array[48, byte]
-      let s = output.serialize_public_key_compressed(pubkey)
+      let s = output.serialize_pubkey_compressed(pubkey)
       doAssert s == cttBLS_Success
       doAssert output == testVector.input.pubkey
 
@@ -236,7 +236,7 @@ testGen(fast_aggregate_verify, testVector, FastAggregateVerify_test):
 
   block testChecks:
     for i in 0 ..< testVector.input.pubkeys.len:
-      status = pubkeys[i].deserialize_public_key_compressed(testVector.input.pubkeys[i])
+      status = pubkeys[i].deserialize_pubkey_compressed(testVector.input.pubkeys[i])
       if status notin {cttBLS_Success, cttBLS_PointAtInfinity}:
         # For point at infinity, we want to make sure that "verify" itself handles them.
         break testChecks
@@ -262,7 +262,7 @@ testGen(aggregate_verify, testVector, AggregateVerify_test):
 
   block testChecks:
     for i in 0 ..< testVector.input.pubkeys.len:
-      status = pubkeys[i].deserialize_public_key_compressed(testVector.input.pubkeys[i])
+      status = pubkeys[i].deserialize_pubkey_compressed(testVector.input.pubkeys[i])
       if status notin {cttBLS_Success, cttBLS_PointAtInfinity}:
         # For point at infinity, we want to make sure that "verify" itself handles them.
         break testChecks
@@ -288,7 +288,7 @@ testGen(batch_verify, testVector, BatchVerify_test):
 
   block testChecks:
     for i in 0 ..< testVector.input.pubkeys.len:
-      status = pubkeys[i].deserialize_public_key_compressed(testVector.input.pubkeys[i])
+      status = pubkeys[i].deserialize_pubkey_compressed(testVector.input.pubkeys[i])
       if status notin {cttBLS_Success, cttBLS_PointAtInfinity}:
         # For point at infinity, we want to make sure that "verify" itself handles them.
         break testChecks
diff --git a/tests/t_hash_sha256_vs_openssl.nim b/tests/t_hash_sha256_vs_openssl.nim
index 9a58a612a..4f736bc43 100644
--- a/tests/t_hash_sha256_vs_openssl.nim
+++ b/tests/t_hash_sha256_vs_openssl.nim
@@ -39,7 +39,7 @@ when not defined(windows):
         digest: ptr array[32, byte] = nil
       ): ptr array[32, byte] {.cdecl, dynlib: DLLSSLName, importc.}
 
-  # proc EVP_Q_digest[T: byte| char](
+  # proc EVP_Q_digest[T: byte|char](
   #                 ossl_libctx: pointer,
   #                 algoName: cstring,
   #                 propq: cstring,
@@ -49,7 +49,7 @@ when not defined(windows):
 
   proc SHA256_OpenSSL[T: byte|char](
         digest: var array[32, byte],
-        s: openarray[T]) =
+        s: openArray[T]) =
     discard SHA256(s, digest.addr)
     # discard EVP_Q_digest(nil, "SHA256", nil, s, digest, nil)