Skip to content

Commit 5fea831

Browse files
committed
riscv: P extension intrinsics for packed SIMD (part 1)
Implement by inline assembly for now, uses `pure, nomem, nostack` for all packed simd arithmetic instructions. Uses `inlateout` when it requires using the same register for input and output, use `lateout` for all output registers. This commit also includes a rearrangement of shared risc-v architecture module to improve documents. It also includes a doc test fix, gate sm3/4 and use explict sm3/4 instruction under rustc target feature.
1 parent 3fd17e4 commit 5fea831

File tree

4 files changed

+1090
-40
lines changed

4 files changed

+1090
-40
lines changed

crates/core_arch/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
doc_cfg,
2020
tbm_target_feature,
2121
sse4a_target_feature,
22+
riscv_target_feature,
2223
arm_target_feature,
2324
cmpxchg16b_target_feature,
2425
avx512_target_feature,

crates/core_arch/src/mod.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
#[macro_use]
44
mod macros;
55

6+
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64", doc))]
7+
mod riscv_shared;
8+
69
#[cfg(any(target_arch = "arm", target_arch = "aarch64", doc))]
710
mod arm_shared;
811

@@ -276,10 +279,6 @@ mod aarch64;
276279
#[doc(cfg(any(target_arch = "arm")))]
277280
mod arm;
278281

279-
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64", doc))]
280-
#[doc(cfg(any(target_arch = "riscv32", target_arch = "riscv64")))]
281-
mod riscv_shared;
282-
283282
#[cfg(any(target_arch = "riscv64", doc))]
284283
#[doc(cfg(any(target_arch = "riscv64")))]
285284
mod riscv64;

crates/core_arch/src/riscv_shared/mod.rs

+25-36
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
//! Shared RISC-V intrinsics
2+
mod p;
3+
4+
pub use p::*;
25

36
use crate::arch::asm;
47

@@ -602,12 +605,10 @@ pub unsafe fn hinval_gvma_all() {
602605
/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
603606
/// this instruction must always be independent from the data it operates on.
604607
#[inline]
608+
#[target_feature(enable = "zksh")]
605609
pub fn sm3p0(x: u32) -> u32 {
606610
let ans: u32;
607-
unsafe {
608-
// asm!("sm3p0 {}, {}", out(reg) ans, in(reg) x, options(nomem, nostack))
609-
asm!(".insn i 0x13, 0x1, {}, {}, 0x108", out(reg) ans, in(reg) x, options(nomem, nostack))
610-
};
611+
unsafe { asm!("sm3p0 {}, {}", lateout(reg) ans, in(reg) x, options(pure, nomem, nostack)) };
611612
ans
612613
}
613614

@@ -634,12 +635,10 @@ pub fn sm3p0(x: u32) -> u32 {
634635
/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
635636
/// this instruction must always be independent from the data it operates on.
636637
#[inline]
638+
#[target_feature(enable = "zksh")]
637639
pub fn sm3p1(x: u32) -> u32 {
638640
let ans: u32;
639-
unsafe {
640-
// asm!("sm3p1 {}, {}", out(reg) ans, in(reg) x, options(nomem, nostack))
641-
asm!(".insn i 0x13, 0x1, {}, {}, 0x109", out(reg) ans, in(reg) x, options(nomem, nostack))
642-
};
641+
unsafe { asm!("sm3p1 {}, {}", lateout(reg) ans, in(reg) x, options(pure, nomem, nostack)) };
643642
ans
644643
}
645644

@@ -674,33 +673,28 @@ pub fn sm3p1(x: u32) -> u32 {
674673
/// It can be implemented by `sm4ed` instruction like:
675674
///
676675
/// ```no_run
676+
/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
677+
/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
678+
/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed;
679+
/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed;
677680
/// let a = x1 ^ x2 ^ x3 ^ rk;
678681
/// let c0 = sm4ed::<0>(x0, a);
679682
/// let c1 = sm4ed::<1>(c0, a); // c1 represents c[0..=1], etc.
680683
/// let c2 = sm4ed::<2>(c1, a);
681684
/// let c3 = sm4ed::<3>(c2, a);
682685
/// return c3; // c3 represents c[0..=3]
686+
/// # }
683687
/// ```
684688
///
685689
/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
686690
/// this instruction must always be independent from the data it operates on.
691+
#[inline]
692+
#[target_feature(enable = "zksed")]
687693
pub fn sm4ed<const BS: u8>(x: u32, a: u32) -> u32 {
688694
static_assert!(BS: u8 where BS <= 3);
689695
let ans: u32;
690-
match BS {
691-
0 => unsafe {
692-
asm!(".insn r 0x33, 0, 0x18, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack))
693-
},
694-
1 => unsafe {
695-
asm!(".insn r 0x33, 0, 0x38, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack))
696-
},
697-
2 => unsafe {
698-
asm!(".insn r 0x33, 0, 0x58, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack))
699-
},
700-
3 => unsafe {
701-
asm!(".insn r 0x33, 0, 0x78, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack))
702-
},
703-
_ => unreachable!(),
696+
unsafe {
697+
asm!("sm4ed {}, {}, {}, {}", lateout(reg) ans, in(reg) x, in(reg) k, const BS, options(pure, nomem, nostack))
704698
};
705699
ans
706700
}
@@ -739,33 +733,28 @@ pub fn sm4ed<const BS: u8>(x: u32, a: u32) -> u32 {
739733
/// Hence, the key schedule operation can be implemented by `sm4ks` instruction like:
740734
///
741735
/// ```no_run
736+
/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
737+
/// # fn key_schedule(k0: u32, k1: u32, k2: u32, k3: u32, ck_i: u32) -> u32 {
738+
/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ks;
739+
/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ks;
742740
/// let k = k1 ^ k2 ^ k3 ^ ck_i;
743741
/// let c0 = sm4ks::<0>(k0, k);
744742
/// let c1 = sm4ks::<1>(c0, k); // c1 represents c[0..=1], etc.
745743
/// let c2 = sm4ks::<2>(c1, k);
746744
/// let c3 = sm4ks::<3>(c2, k);
747745
/// return c3; // c3 represents c[0..=3]
746+
/// # }
748747
/// ```
749748
///
750749
/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
751750
/// this instruction must always be independent from the data it operates on.
751+
#[inline]
752+
#[target_feature(enable = "zksed")]
752753
pub fn sm4ks<const BS: u8>(x: u32, k: u32) -> u32 {
753754
static_assert!(BS: u8 where BS <= 3);
754755
let ans: u32;
755-
match BS {
756-
0 => unsafe {
757-
asm!(".insn r 0x33, 0, 0x1A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack))
758-
},
759-
1 => unsafe {
760-
asm!(".insn r 0x33, 0, 0x3A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack))
761-
},
762-
2 => unsafe {
763-
asm!(".insn r 0x33, 0, 0x5A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack))
764-
},
765-
3 => unsafe {
766-
asm!(".insn r 0x33, 0, 0x7A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack))
767-
},
768-
_ => unreachable!(),
756+
unsafe {
757+
asm!("sm4ks {}, {}, {}, {}", lateout(reg) ans, in(reg) x, in(reg) k, const BS, options(pure, nomem, nostack))
769758
};
770759
ans
771760
}

0 commit comments

Comments
 (0)