Skip to content

Commit e6bc261

Browse files
committed
riscv: P extension intrinsics for packed SIMD (part 1)
Implement by inline assembly for now, uses `pure, nomem, nostack` for all packed simd arithmetic instructions. Uses `inlateout` when it requires using the same register for input and output. This commit also includes a rearrangement of shared risc-v architecture module to improve documents. It includes a doc test fix as well.
1 parent 3fd17e4 commit e6bc261

File tree

3 files changed

+1077
-4
lines changed

3 files changed

+1077
-4
lines changed

crates/core_arch/src/mod.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
#[macro_use]
44
mod macros;
55

6+
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64", doc))]
7+
mod riscv_shared;
8+
69
#[cfg(any(target_arch = "arm", target_arch = "aarch64", doc))]
710
mod arm_shared;
811

@@ -276,10 +279,6 @@ mod aarch64;
276279
#[doc(cfg(any(target_arch = "arm")))]
277280
mod arm;
278281

279-
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64", doc))]
280-
#[doc(cfg(any(target_arch = "riscv32", target_arch = "riscv64")))]
281-
mod riscv_shared;
282-
283282
#[cfg(any(target_arch = "riscv64", doc))]
284283
#[doc(cfg(any(target_arch = "riscv64")))]
285284
mod riscv64;

crates/core_arch/src/riscv_shared/mod.rs

+13
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
//! Shared RISC-V intrinsics
2+
mod p;
3+
4+
pub use p::*;
25

36
use crate::arch::asm;
47

@@ -674,12 +677,17 @@ pub fn sm3p1(x: u32) -> u32 {
674677
/// It can be implemented by `sm4ed` instruction like:
675678
///
676679
/// ```no_run
680+
/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
681+
/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
682+
/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed;
683+
/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed;
677684
/// let a = x1 ^ x2 ^ x3 ^ rk;
678685
/// let c0 = sm4ed::<0>(x0, a);
679686
/// let c1 = sm4ed::<1>(c0, a); // c1 represents c[0..=1], etc.
680687
/// let c2 = sm4ed::<2>(c1, a);
681688
/// let c3 = sm4ed::<3>(c2, a);
682689
/// return c3; // c3 represents c[0..=3]
690+
/// # }
683691
/// ```
684692
///
685693
/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
@@ -739,12 +747,17 @@ pub fn sm4ed<const BS: u8>(x: u32, a: u32) -> u32 {
739747
/// Hence, the key schedule operation can be implemented by `sm4ks` instruction like:
740748
///
741749
/// ```no_run
750+
/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
751+
/// # fn key_schedule(k0: u32, k1: u32, k2: u32, k3: u32, ck_i: u32) -> u32 {
752+
/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ks;
753+
/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ks;
742754
/// let k = k1 ^ k2 ^ k3 ^ ck_i;
743755
/// let c0 = sm4ks::<0>(k0, k);
744756
/// let c1 = sm4ks::<1>(c0, k); // c1 represents c[0..=1], etc.
745757
/// let c2 = sm4ks::<2>(c1, k);
746758
/// let c3 = sm4ks::<3>(c2, k);
747759
/// return c3; // c3 represents c[0..=3]
760+
/// # }
748761
/// ```
749762
///
750763
/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of

0 commit comments

Comments
 (0)