From 4a59d4ba083a1ae4a5d24c6dff726f2508d9a245 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Mon, 14 Aug 2023 02:39:20 -0700 Subject: [PATCH 01/26] [Zv fast track] prototyping vclmul* changes --- doc/vector/insns/vclmul.adoc | 21 +++++++++------------ doc/vector/insns/vclmulh.adoc | 19 ++++++++++--------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/doc/vector/insns/vclmul.adoc b/doc/vector/insns/vclmul.adoc index ad941888..499d7562 100644 --- a/doc/vector/insns/vclmul.adoc +++ b/doc/vector/insns/vclmul.adoc @@ -36,7 +36,8 @@ Encoding (Vector-Scalar):: ]} .... Reserved Encodings:: -* `SEW` is any value other than 64 +* `SEW` is any value other than 64 (`Zvbc`) +* `SEW` is any value other than 32 or 64 (`Zvbcb`) Arguments:: @@ -55,20 +56,16 @@ Arguments:: Description:: Produces the low half of 128-bit carry-less product. -Each 64-bit element in the `vs2` vector register is carry-less multiplied by -either each 64-bit element in `vs1` (vector-vector), or the 64-bit value +Each SEW-bit element in the `vs2` vector register is carry-less multiplied by +either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value from integer register `rs1` (vector-scalar). The result is the least -significant 64 bits of the carry-less product. +significant SEW bits of the carry-less product. [NOTE] ==== The 64-bit carryless multiply instructions can be used for implementing GCM in the absence of the `zvkg` extension. We do not make these instructions exclusive as the 64-bit carryless multiply is readily derived from the instructions in the `zvkg` extension and can have utility in other areas. -Likewise, we treat other SEW values as reserved so as not to preclude -future extensions from using this opcode with different element widths. -For example, a future extension might define an `SEW`=32 version of this instruction to enable `Zve32*` implementations to have -vector carryless multiplication instructions. ==== Operation:: @@ -79,10 +76,10 @@ Operation:: function clause execute (VCLMUL(vs2, vs1, vd, suffix)) = { foreach (i from vstart to vl-1) { - let op1 : bits (64) = if suffix =="vv" then get_velem(vs1,i) + let op1 : bits (SEW) = if suffix =="vv" then get_velem(vs1, i) else zext_or_truncate_to_sew(X(vs1)); - let op2 : bits (64) = get_velem(vs2,i); - let product : bits (64) = clmul(op1,op2,SEW); + let op2 : bits (SEW) = get_velem(vs2, i); + let product : bits (SEW) = clmul(op1, op2, SEW); set_velem(vd, i, product); } RETIRE_SUCCESS @@ -98,4 +95,4 @@ function clmul(x, y, width) = { -- Included in:: -<>, <>, <> +<>, <>, <>, <> diff --git a/doc/vector/insns/vclmulh.adoc b/doc/vector/insns/vclmulh.adoc index 44f125ce..b5c0acb7 100644 --- a/doc/vector/insns/vclmulh.adoc +++ b/doc/vector/insns/vclmulh.adoc @@ -36,7 +36,8 @@ Encoding (Vector-Scalar):: ]} .... Reserved Encodings:: -* `SEW` is any value other than 64 +* `SEW` is any value other than 64 (`Zvbcb`) +* `SEW` is any value other than 32 or 64 (`Zvbcb`) Arguments:: @@ -52,13 +53,13 @@ Arguments:: | Vd | output | carry-less product high |=== -Description:: +Description:: Produces the high half of 128-bit carry-less product. -Each 64-bit element in the `vs2` vector register is carry-less multiplied by -either each 64-bit element in `vs1` (vector-vector), or the 64-bit value +Each SEW-bit element in the `vs2` vector register is carry-less multiplied by +either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value from integer register `rs1` (vector-scalar). The result is the most -significant 64 bits of the carry-less product. +significant SEW bits of the carry-less product. // This instruction must always be implemented such that its execution latency does not depend // on the data being operated upon. @@ -69,10 +70,10 @@ Operation:: function clause execute (VCLMULH(vs2, vs1, vd, suffix)) = { foreach (i from vstart to vl-1) { - let op1 : bits (64) = if suffix =="vv" then get_velem(vs1,i) + let op1 : bits (SEW) = if suffix =="vv" then get_velem(vs1,i) else zext_or_truncate_to_sew(X(vs1)); - let op2 : bits (64) = get_velem(vs2, i); - let product : bits (64) = clmulh(op1, op2, SEW); + let op2 : bits (SEW) = get_velem(vs2, i); + let product : bits (SEW) = clmulh(op1, op2, SEW); set_velem(vd, i, product); } RETIRE_SUCCESS @@ -89,4 +90,4 @@ function clmulh(x, y, width) = { -- Included in:: -<>, <>, <> +<>, <>, <>, <> From a1bfcfce97ab683584bb9da4dc672be511c4a5cc Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Mon, 14 Aug 2023 02:39:31 -0700 Subject: [PATCH 02/26] [Zv fast track] prototyping vg* changes --- doc/vector/insns/vghsh.adoc | 41 +++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/doc/vector/insns/vghsh.adoc b/doc/vector/insns/vghsh.adoc index cd02b0e6..bb9c97a4 100644 --- a/doc/vector/insns/vghsh.adoc +++ b/doc/vector/insns/vghsh.adoc @@ -1,13 +1,14 @@ [[insns-vghsh, Vector GHASH Add-Multiply]] -= vghsh.vv += vghsh.[vv,vs] Synopsis:: Vector Add-Multiply over GHASH Galois-Field Mnemonic:: -vghsh.vv vd, vs2, vs1 +vghsh.vv vd, vs2, vs1 + +vghsh.vs vd, vs2, vs1 -Encoding:: +Encoding (Vector-Vector):: [wavedrom, , svg] .... {reg:[ @@ -20,8 +21,25 @@ Encoding:: {bits: 6, name: '101100'}, ]} .... + +// This might be the first instruction with 3 operands and .vs +// need to find an encoding +Encoding (Vector-Scalar):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-P'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVV'}, +{bits: 5, name: 'vs1'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: '1'}, +{bits: 6, name: '101100'}, +]} +.... + Reserved Encodings:: -* `SEW` is any value other than 32 +* `SEW` is any value other than 32 Arguments:: @@ -41,10 +59,10 @@ Arguments:: | Vd | output | 128 | 4 | 32 | Partial-hash (Y~i+1~) |=== -Description:: +Description:: A single "iteration" of the GHASH~H~ algorithm is performed. -This instruction treats all of the inputs and outputs as 128-bit polynomials and +This instruction treats all of the inputs and outputs as 128-bit polynomials and performs operations over GF[2]. It produces the next partial hash (Y~i+1~) by adding the current partial hash (Y~i~) to the cipher text block (X~i~) and then multiplying (over GF(2^128^)) @@ -60,7 +78,7 @@ Y~i+1~ = ((Y~i~ ^ X~i~) · H) The NIST specification (see <>) orders the coefficients from left to right x~0~x~1~x~2~...x~127~ for a polynomial x~0~ + x~1~u +x~2~ u^2^ + ... + x~127~u^127^. This can be viewed as a collection of byte elements in memory with the byte containing the lowest coefficients (i.e., 0,1,2,3,4,5,6,7) -residing at the lowest memory address. Since the bits in the bytes are reversed, +residing at the lowest memory address. Since the bits in the bytes are reversed, This instruction internally performs bit swaps within bytes to put the bits in the standard ordering (e.g., 7,6,5,4,3,2,1,0). @@ -78,7 +96,7 @@ swap bit positions and therefore do not require any logic. ==== Since the same hash subkey `H` will typically be used repeatedly on a given message, a future extension might define a vector-scalar version of this instruction where -`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1. +`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1. ==== Operation:: @@ -93,11 +111,12 @@ function clause execute (VGHSH(vs2, vs1, vd)) = { eg_len = (vl/EGS) eg_start = (vstart/EGS) - + foreach (i from eg_start to eg_len-1) { + let helem = if suffix == "vv" then i else 0; let Y = (get_velem(vd,EGW=128,i)); // current partial-hash let X = get_velem(vs1,EGW=128,i); // block cipher output - let H = brev8(get_velem(vs2,EGW=128,i)); // Hash subkey + let H = brev8(get_velem(vs2, EGW=128, helem)); // Hash subkey let Z : bits(128) = 0; @@ -122,4 +141,4 @@ function clause execute (VGHSH(vs2, vs1, vd)) = { -- Included in:: -<>, <>, <> +<>, <>, <>, <> From bc7f52746c27030133b47fafe3247a7821ac3a45 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Mon, 14 Aug 2023 04:55:27 -0700 Subject: [PATCH 03/26] Completing vghsh.vs/vgmul.vs descriptions --- doc/vector/insns/vghsh.adoc | 21 ++++++++------- doc/vector/insns/vgmul.adoc | 53 +++++++++++++++++++++++++------------ 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/doc/vector/insns/vghsh.adoc b/doc/vector/insns/vghsh.adoc index bb9c97a4..b487b11d 100644 --- a/doc/vector/insns/vghsh.adoc +++ b/doc/vector/insns/vghsh.adoc @@ -6,7 +6,7 @@ Vector Add-Multiply over GHASH Galois-Field Mnemonic:: vghsh.vv vd, vs2, vs1 + -vghsh.vs vd, vs2, vs1 +vghsh.vs vd, rs2, vs1 Encoding (Vector-Vector):: [wavedrom, , svg] @@ -40,6 +40,7 @@ Encoding (Vector-Scalar):: Reserved Encodings:: * `SEW` is any value other than 32 +* `vghsh.vs` encoding (except if `Zvkgb` is enabled) Arguments:: @@ -62,7 +63,15 @@ Arguments:: Description:: A single "iteration" of the GHASH~H~ algorithm is performed. -This instruction treats all of the inputs and outputs as 128-bit polynomials and + +The previous partial hashes are read as 4-element groups from 'vd', +the cipher texts are read as 4-element groups from `vs1` + and the hash subkeys are read from either the corresponding 4-element group +in `vs2` (vector-vector form) or the scalar element group in `vs2` +(vector-scalar form, `Zvkgb` only). The resulting partial hashes are writen as 4-element groups into `vd`. + + +This instruction treats all of the input and output element groups as 128-bit polynomials and performs operations over GF[2]. It produces the next partial hash (Y~i+1~) by adding the current partial hash (Y~i~) to the cipher text block (X~i~) and then multiplying (over GF(2^128^)) @@ -92,17 +101,11 @@ with the NIST specification. These reversals are inexpensive to implement as the swap bit positions and therefore do not require any logic. ==== -[NOTE] -==== -Since the same hash subkey `H` will typically be used repeatedly on a given message, -a future extension might define a vector-scalar version of this instruction where -`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1. -==== Operation:: [source,pseudocode] -- -function clause execute (VGHSH(vs2, vs1, vd)) = { +function clause execute (VGHSH(vs2, vs1, vd, suffix)) = { // operands are input with bits reversed in each byte if(LMUL*VLEN < EGW) then { handle_illegal(); // illegal instruction exception diff --git a/doc/vector/insns/vgmul.adoc b/doc/vector/insns/vgmul.adoc index 0008132c..ca858010 100644 --- a/doc/vector/insns/vgmul.adoc +++ b/doc/vector/insns/vgmul.adoc @@ -7,7 +7,7 @@ Vector Multiply over GHASH Galois-Field Mnemonic:: vgmul.vv vd, vs2 -Encoding:: +Encoding (Vector-Vector):: [wavedrom, , svg] .... {reg:[ @@ -20,8 +20,25 @@ Encoding:: {bits: 6, name: '101000'}, ]} .... + + +Encoding (Vector-Scalar):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-P'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVV'}, +{bits: 5, name: '10001'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: '1'}, +{bits: 6, name: '101001'}, +]} +.... + Reserved Encodings:: -* `SEW` is any value other than 32 +* `SEW` is any value other than 32 +* `vgmul.vs` encoding (except if `Zvkgb` is enabled) Arguments:: @@ -40,9 +57,14 @@ Arguments:: | Vd | output | 128 | 4 | 32 | Product |=== -Description:: +Description:: A GHASH~H~ multiply is performed. +The multipliers are read as 4-element groups from 'vd', + the multiplicands subkeys are read from either the corresponding 4-element group +in `vs2` (vector-vector form) or the scalar element group in `vs2` +(vector-scalar form, `Zvkgb` only). The resulting products are written as 4-element groups into `vd`. + This instruction treats all of the inputs and outputs as 128-bit polynomials and performs operations over GF[2]. It produces the product over GF(2^128^) of the two 128-bit inputs. @@ -67,27 +89,23 @@ with the NIST specification. These reversals are inexpensive to implement as the swap bit positions and therefore do not require any logic. ==== -[NOTE] -==== -Since the same multiplicand will typically be used repeatedly on a given message, -a future extension might define a vector-scalar version of this instruction where -`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1. -==== [NOTE] ==== -This instruction is identical to `vghsh.vv` with vs1=0. +The instruction `vgmul.vv` is identical to `vghsh.vv` with vs1=0. This instruction is often used in GHASH code. In some cases it is followed by an XOR to perform a multiply-add. Implementations may choose to fuse these -two instructions to improve performance on GHASH code that -doesn't use the add-multiply form of the `vghsh.vv` instruction. +two instructions to improve performance on GHASH code that +doesn't use the add-multiply form of the `vghsh.vv` instruction. + +Similarly, the instruction `vgmul.vs` is identical to `vghsh.vs` with vs1=0. ==== Operation:: [source,pseudocode] -- -function clause execute (VGMUL(vs2, vs1, vd)) = { +function clause execute (VGMUL(vs2, vs1, vd, suffix)) = { // operands are input with bits reversed in each byte if(LMUL*VLEN < EGW) then { handle_illegal(); // illegal instruction exception @@ -96,10 +114,11 @@ function clause execute (VGMUL(vs2, vs1, vd)) = { eg_len = (vl/EGS) eg_start = (vstart/EGS) - + foreach (i from eg_start to eg_len-1) { + let helem = if suffix == "vv" then i else 0; let Y = brev8(get_velem(vd,EGW=128,i)); // Multiplier - let H = brev8(get_velem(vs2,EGW=128,i)); // Multiplicand + let H = brev8(get_velem(vs2,EGW=128, helem)); // Multiplicand let Z : bits(128) = 0; for (int bit = 0; bit < 128; bit++) { @@ -113,7 +132,7 @@ function clause execute (VGMUL(vs2, vs1, vd)) = { } - let result = brev8(Z); + let result = brev8(Z); set_velem(vd, EGW=128, i, result); } RETIRE_SUCCESS @@ -122,4 +141,4 @@ function clause execute (VGMUL(vs2, vs1, vd)) = { -- Included in:: -<>, <>, <> +<>, <>, <>, <> From 6b8eadb83afc7a36e83c6ac462bade63b024918e Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Sun, 27 Aug 2023 10:35:05 -0700 Subject: [PATCH 04/26] adding directory with vector-crypto extra skeleton --- doc/vector-extra/Makefile | 67 +++++ .../riscv-crypto-spec-vector-extra.adoc | 252 ++++++++++++++++++ .../riscv-crypto-vector-zvbc32e.adoc | 18 ++ .../riscv-crypto-vector-zvkgs.adoc | 41 +++ doc/vector-extra/vghsh-vs.adoc | 147 ++++++++++ doc/vector-extra/vgmul-vs.adoc | 144 ++++++++++ 6 files changed, 669 insertions(+) create mode 100644 doc/vector-extra/Makefile create mode 100644 doc/vector-extra/riscv-crypto-spec-vector-extra.adoc create mode 100644 doc/vector-extra/riscv-crypto-vector-zvbc32e.adoc create mode 100644 doc/vector-extra/riscv-crypto-vector-zvkgs.adoc create mode 100644 doc/vector-extra/vghsh-vs.adoc create mode 100644 doc/vector-extra/vgmul-vs.adoc diff --git a/doc/vector-extra/Makefile b/doc/vector-extra/Makefile new file mode 100644 index 00000000..3dd87daa --- /dev/null +++ b/doc/vector-extra/Makefile @@ -0,0 +1,67 @@ +# Makefile for RISC-V Doc Template +# +# This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 +# International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. +# +# SPDX-License-Identifier: CC-BY-SA-4.0 +# +# Description: +# +# This Makefile is designed to automate the process of building and packaging +# the Doc Template for RISC-V Extensions. + +DOCKER_RUN := docker run --rm -v ${PWD}:/build -w /build \ +riscvintl/riscv-docs-base-container-image:latest +HEADER_SOURCE := riscv-crypto-spec-vector-extra.adoc +PDF_RESULT := riscv-crypto-spec-vector-extra.pdf +SPEC_COMMIT= git-commit.adoc +ASCIIDOCTOR_PDF := asciidoctor-pdf +OPTIONS := --trace \ + -a compress \ + -a mathematical-format=svg \ + -a pdf-fontsdir=resources/fonts \ + -a pdf-style=resources/themes/risc-v_spec-pdf.yml \ + -a toc \ + --failure-level=ERROR +REQUIRES := --require=asciidoctor-bibtex \ + --require=asciidoctor-diagram \ + --require=asciidoctor-mathematical + +.PHONY: all build clean build-container build-no-container + +all: build + +cp_bib: + @cp ../riscv-crypto-spec.bib ./ + +$(SPEC_COMMIT): + @git rev-parse --abbrev-ref HEAD > ${@} + @echo "@" >> ${@} + @git log --pretty=format:'%H' -n 1 >> ${@} + +build: cp_bib $(SPEC_COMMIT) + @echo "Checking if Docker is available..." + @if command -v docker &> /dev/null ; then \ + echo "Docker is available, building inside Docker container..."; \ + $(MAKE) build-container; \ + else \ + echo "Docker is not available, building without Docker..."; \ + $(MAKE) build-no-container; \ + fi + +build-container: + @echo "Starting build inside Docker container..." + $(DOCKER_RUN) /bin/sh -c "$(ASCIIDOCTOR_PDF) $(OPTIONS) $(REQUIRES) --out-file=$(PDF_RESULT) $(HEADER_SOURCE)" + @echo "Build completed successfully inside Docker container." + +build-no-container: + @echo "Starting build..." + $(ASCIIDOCTOR_PDF) $(OPTIONS) $(REQUIRES) --out-file=$(PDF_RESULT) $(HEADER_SOURCE) + @echo "Build completed successfully." + +clean: + @echo "Cleaning up generated files..." + rm -f $(PDF_RESULT) + @echo "Cleanup completed." diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc new file mode 100644 index 00000000..768d7999 --- /dev/null +++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc @@ -0,0 +1,252 @@ +[[riscv-doc-template]] += RISC-V Cryptography Extensions Volume II: Vector Instructions +:description: The vector cryptography extensions for the RISC-V ISA. +:company: RISC-V.org +:revdate: 08 August 2023 +:revnumber: v1.0.0 +:revremark: RC2 +:url-riscv: http://riscv.org +:doctype: book +//:doctype: report +:preface-title: Preamble +:colophon: +:appendix-caption: Appendix +:imagesdir: images +:title-logo-image: image:risc-v_logo.png[pdfwidth=3.25in,align=center] +//:page-background-image: image:draft.svg[opacity=20%] +//:title-page-background-image: none +//:back-cover-image: image:circuit.png[opacity=25%] +// Settings: +:experimental: +:reproducible: +// needs to be changed? bug discussion started +:WaveDromEditorApp: wavedrom-cli +:imagesoutdir: images +:icons: font +:lang: en +:listing-caption: Listing +:sectnums: +:toc: left +:toclevels: 4 +:source-highlighter: pygments +ifdef::backend-pdf[] +:source-highlighter: coderay +endif::[] +:data-uri: +:hide-uri-scheme: +:stem: latexmath +:footnote: +:xrefstyle: short +:bibtex-file: riscv-crypto-spec.bib +:bibtex-order: alphabetical +:bibtex-style: ieee + +//:This is the preamble. + +[colophon] += Colophon + +This document describes the Vector Cryptography extensions to the +RISC-V Instruction Set Architecture. + +This document is _frozen_. +Change is extremely unlikely. A high threshold will be used, and a +change will only occur because of some truly critical issue being +identified during the public review cycle. Any other desired or needed +changes can be the subject of a follow-on new extension. +For more information, see link:http://riscv.org/spec-state[here]. + +[NOTE] +.Copyright and licensure: +This work is licensed under a +link:http://creativecommons.org/licenses/by/4.0/[Creative Commons Attribution 4.0 International License] + +[NOTE] +.Document Version Information: +==== +include::git-commit.adoc[] + +See link:https://github.com/riscv/riscv-crypto[github.com/riscv/riscv-crypto] +for more information. +==== + +[acknowledgments] +== Acknowledgments + +Contributors to this specification (in alphabetical order) +include: + +Alan Baum, +Barna Ibrahim, +Barry Spinney, +Ben Marshall, +Derek Atkins, +link:mailto:kdockser@tenstorrent.com[Ken Dockser] (Editor), +Markku-Juhani O. Saarinen, +Nicolas Brunie, +Richard Newell + +We are all very grateful to the many other people who have +helped to improve this specification through their comments, reviews, +feedback and questions. + +// ------------------------------------------------------------ + +include::riscv-crypto-vector-introduction.adoc[] +include::riscv-crypto-vector-audience.adoc[] +include::riscv-crypto-vector-sail-specifications.adoc[] +include::riscv-crypto-vector-policies.adoc[] + +// ------------------------------------------------------------ + +include::./riscv-crypto-vector-element-groups.adoc[] +include::./riscv-crypto-vector-instruction-constraints.adoc[] +include::./riscv-crypto-vector-scalar-instructions.adoc[] +include::./riscv-crypto-vector-software-portability.adoc[] +<<< + +// ------------------------------------------------------------ + + +[[crypto_vector_extensions]] +== Extensions Overview + +The section introduces all of the extensions in the Vector Cryptography +Instruction Set Extension Specification. + +The <> and <> Vector Crypto Extensions +--and accordingly the composite extensions <> and <>-- +require a Zve64x base, +or application ("V") base Vector Extension. + +All of the other Vector Crypto Extensions can be built +on _any_ embedded (Zve*) or application ("V") base Vector Extension. + +// See <> for more details on vector element groups and the drawbacks of +// small `VLEN` values. + + +All _cryptography-specific_ instructions defined in this Vector Crypto specification (i.e., those +in <>, <>, <>, <> and <> but _not_ <>,<>, or <>) shall +be executed with data-independent execution latency as defined in the +link:https://github.com/riscv/riscv-crypto/releases/tag/v1.0.1-scalar[RISC-V Scalar Cryptography Extensions specification]. +It is important to note that the Vector Crypto instructions are independent of the +implementation of the `Zkt` extension and do not require that `Zkt` is implemented. + +This specification includes a <> extension that, when implemented, requires certain vector instructions +(including <>, <>, and <>) to be executed with data-independent execution latency. + +Detection of individual cryptography extensions uses the +unified software-based RISC-V discovery method. + +[NOTE] +==== +At the time of writing, these discovery mechanisms are still a work in +progress. +==== + +include::./riscv-crypto-vector-zvbb.adoc[] +<<< +include::./riscv-crypto-vector-zvbc.adoc[] +<<< +include::./riscv-crypto-vector-zvkb.adoc[] +<<< +include::./riscv-crypto-vector-zvkg.adoc[] +<<< +include::./riscv-crypto-vector-zvkned.adoc[] +<<< +include::./riscv-crypto-vector-zvknh.adoc[] +<<< +include::./riscv-crypto-vector-zvksed.adoc[] +<<< +include::./riscv-crypto-vector-zvksh.adoc[] +<<< +include::./riscv-crypto-vector-zvkn.adoc[] +<<< +include::./riscv-crypto-vector-zvknc.adoc[] +<<< +include::./riscv-crypto-vector-zvkng.adoc[] +<<< +include::./riscv-crypto-vector-zvks.adoc[] +<<< +include::./riscv-crypto-vector-zvksc.adoc[] +<<< +include::./riscv-crypto-vector-zvksg.adoc[] +<<< +include::./riscv-crypto-vector-zvkt.adoc[] +<<< + + + +// ------------------------------------------------------------ + +[[crypto_vector_insns, reftext="Vector Cryptography Instructions"]] +== Instructions + + +include::insns/vaesdf.adoc[leveloffset=+2] +<<< +include::insns/vaesdm.adoc[leveloffset=+2] +<<< +include::insns/vaesef.adoc[leveloffset=+2] +<<< +include::insns/vaesem.adoc[leveloffset=+2] +<<< +include::insns/vaeskf1.adoc[leveloffset=+2] +<<< +include::insns/vaeskf2.adoc[leveloffset=+2] +<<< +include::insns/vaesz.adoc[leveloffset=+2] +<<< +include::insns/vandn.adoc[leveloffset=+2] +<<< +include::insns/vbrev.adoc[leveloffset=+2] +<<< +include::insns/vbrev8.adoc[leveloffset=+2] +<<< +include::insns/vclmul.adoc[leveloffset=+2] +<<< +include::insns/vclmulh.adoc[leveloffset=+2] +<<< +include::insns/vclz.adoc[leveloffset=+2] +<<< +include::insns/vcpop.adoc[leveloffset=+2] +<<< +include::insns/vctz.adoc[leveloffset=+2] +<<< +include::insns/vghsh.adoc[leveloffset=+2] +<<< +include::insns/vgmul.adoc[leveloffset=+2] +<<< +include::insns/vrev8.adoc[leveloffset=+2] +<<< +include::insns/vrol.adoc[leveloffset=+2] +<<< +include::insns/vror.adoc[leveloffset=+2] +<<< +include::insns/vsha2c.adoc[leveloffset=+2] +<<< +include::insns/vsha2ms.adoc[leveloffset=+2] +<<< +include::insns/vsm3c.adoc[leveloffset=+2] +<<< +include::insns/vsm3me.adoc[leveloffset=+2] +<<< +include::insns/vsm4k.adoc[leveloffset=+2] +<<< +include::insns/vsm4r.adoc[leveloffset=+2] +<<< +include::insns/vwsll.adoc[leveloffset=+2] +<<< + +[[bibliography]] +== Bibliography + +bibliography::../riscv-crypto-spec.bib[ieee] + +[[Encodings]] +== Encodings +include::./riscv-crypto-vector-inst-table.adoc[] +include::./riscv-crypto-vector-inst-table-zvbb-zvbc.adoc[] + + +include::./riscv-crypto-vector-appx-sail.adoc[] diff --git a/doc/vector-extra/riscv-crypto-vector-zvbc32e.adoc b/doc/vector-extra/riscv-crypto-vector-zvbc32e.adoc new file mode 100644 index 00000000..7bd8e84e --- /dev/null +++ b/doc/vector-extra/riscv-crypto-vector-zvbc32e.adoc @@ -0,0 +1,18 @@ +[[zvbc,Zvbc]] +=== `Zvbc` - Vector Carryless Multiplication + +General purpose carryless multiplication instructions which are commonly used in cryptography +and hashing (e.g., Elliptic curve cryptography, GHASH, CRC). + +These instructions are only defined for `SEW`=64. + +[%autowidth] +[%header,cols="^2,4"] +|=== +|Mnemonic +|Instruction +| vclmul.[vv,vx] | <> +| vclmulh.[vv,vx] | <> + +|=== + diff --git a/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc b/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc new file mode 100644 index 00000000..254e2ade --- /dev/null +++ b/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc @@ -0,0 +1,41 @@ +[[zvkg,Zvkg]] +=== `Zvkg` - Vector GCM/GMAC + +Instructions to enable the efficient implementation of GHASH~H~ which is used in Galois/Counter Mode (GCM) and +Galois Message Authentication Code (GMAC). + +All of these instructions work on 128-bit element groups comprised of four 32-bit elements. + +GHASH~H~ is defined in the +// link:https://csrc.nist.gov/publications/detail/sp/800-38d/final[NIST Special Publication 800-38D] + "Recommendation for Block Cipher Modes of Operation: Galois/Counter Mode (GCM) and GMAC" + cite:[nist:gcm] +(NIST Specification). + +[NOTE] +==== +GCM is used in conjunction with block ciphers (e.g., AES and SM4) to encrypt a message and +provide authentication. +GMAC is used to provide authentication of a message without encryption. +==== + +To help avoid side-channel timing attacks, these instructions shall be implemented with data-independent timing. + +The number of element groups to be processed is `vl`/`EGS`. +`vl` must be set to the number of `SEW=32` elements to be processed and +therefore must be a multiple of `EGS=4`. + +Likewise, `vstart` must be a multiple of `EGS=4`. + +[%autowidth] +[%header,cols="^2,4,4,4"] +|=== + +|SEW +|EGW +|Mnemonic +|Instruction +| 32 | 128 | vghsh.vv | <> +| 32 | 128 | vgmul.vv | <> + +|=== + diff --git a/doc/vector-extra/vghsh-vs.adoc b/doc/vector-extra/vghsh-vs.adoc new file mode 100644 index 00000000..b487b11d --- /dev/null +++ b/doc/vector-extra/vghsh-vs.adoc @@ -0,0 +1,147 @@ +[[insns-vghsh, Vector GHASH Add-Multiply]] += vghsh.[vv,vs] + +Synopsis:: +Vector Add-Multiply over GHASH Galois-Field + +Mnemonic:: +vghsh.vv vd, vs2, vs1 + +vghsh.vs vd, rs2, vs1 + +Encoding (Vector-Vector):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-P'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVV'}, +{bits: 5, name: 'vs1'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: '1'}, +{bits: 6, name: '101100'}, +]} +.... + +// This might be the first instruction with 3 operands and .vs +// need to find an encoding +Encoding (Vector-Scalar):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-P'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVV'}, +{bits: 5, name: 'vs1'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: '1'}, +{bits: 6, name: '101100'}, +]} +.... + +Reserved Encodings:: +* `SEW` is any value other than 32 +* `vghsh.vs` encoding (except if `Zvkgb` is enabled) + +Arguments:: + +[%autowidth] +[%header,cols="4,2,2,2,2,2"] +|=== +|Register +|Direction +|EGW +|EGS +|SEW +|Definition + +| Vd | input | 128 | 4 | 32 | Partial hash (Y~i~) +| Vs1 | input | 128 | 4 | 32 | Cipher text (X~i~) +| Vs2 | input | 128 | 4 | 32 | Hash Subkey (H) +| Vd | output | 128 | 4 | 32 | Partial-hash (Y~i+1~) +|=== + +Description:: +A single "iteration" of the GHASH~H~ algorithm is performed. + + +The previous partial hashes are read as 4-element groups from 'vd', +the cipher texts are read as 4-element groups from `vs1` + and the hash subkeys are read from either the corresponding 4-element group +in `vs2` (vector-vector form) or the scalar element group in `vs2` +(vector-scalar form, `Zvkgb` only). The resulting partial hashes are writen as 4-element groups into `vd`. + + +This instruction treats all of the input and output element groups as 128-bit polynomials and +performs operations over GF[2]. +It produces the next partial hash (Y~i+1~) by adding the current partial +hash (Y~i~) to the cipher text block (X~i~) and then multiplying (over GF(2^128^)) +this sum by the Hash Subkey (H). + +The multiplication over GF(2^128^) is a carryless multiply of two 128-bit polynomials +modulo GHASH's irreducible polynomial (x^128^ + x^7^ + x^2^ + x + 1). + +The operation can be compactly defined as +// Y~i+1~ = (Y~i~ · H) ^ X~i~ +Y~i+1~ = ((Y~i~ ^ X~i~) · H) + +The NIST specification (see <>) orders the coefficients from left to right x~0~x~1~x~2~...x~127~ +for a polynomial x~0~ + x~1~u +x~2~ u^2^ + ... + x~127~u^127^. This can be viewed as a collection of +byte elements in memory with the byte containing the lowest coefficients (i.e., 0,1,2,3,4,5,6,7) +residing at the lowest memory address. Since the bits in the bytes are reversed, +This instruction internally performs bit swaps within bytes to put the bits in the standard ordering +(e.g., 7,6,5,4,3,2,1,0). + +This instruction must always be implemented such that its execution latency does not depend +on the data being operated upon. + +[NOTE] +==== +We are bit-reversing the bytes of inputs and outputs so that the intermediate values are consistent +with the NIST specification. These reversals are inexpensive to implement as they unconditionally +swap bit positions and therefore do not require any logic. +==== + + +Operation:: +[source,pseudocode] +-- +function clause execute (VGHSH(vs2, vs1, vd, suffix)) = { + // operands are input with bits reversed in each byte + if(LMUL*VLEN < EGW) then { + handle_illegal(); // illegal instruction exception + RETIRE_FAIL + } else { + + eg_len = (vl/EGS) + eg_start = (vstart/EGS) + + foreach (i from eg_start to eg_len-1) { + let helem = if suffix == "vv" then i else 0; + let Y = (get_velem(vd,EGW=128,i)); // current partial-hash + let X = get_velem(vs1,EGW=128,i); // block cipher output + let H = brev8(get_velem(vs2, EGW=128, helem)); // Hash subkey + + let Z : bits(128) = 0; + + let S = brev8(Y ^ X); + + for (int bit = 0; bit < 128; bit++) { + if bit_to_bool(S[bit]) + Z ^= H + + bool reduce = bit_to_bool(H[127]); + H = H << 1; // left shift H by 1 + if (reduce) + H ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial + } + + let result = brev8(Z); // bit reverse bytes to get back to GCM standard ordering + set_velem(vd, EGW=128, i, result); + } + RETIRE_SUCCESS + } +} +-- + +Included in:: +<>, <>, <>, <> diff --git a/doc/vector-extra/vgmul-vs.adoc b/doc/vector-extra/vgmul-vs.adoc new file mode 100644 index 00000000..ca858010 --- /dev/null +++ b/doc/vector-extra/vgmul-vs.adoc @@ -0,0 +1,144 @@ +[[insns-vgmul, Vector GHASH Multiply]] += vgmul.vv + +Synopsis:: +Vector Multiply over GHASH Galois-Field + +Mnemonic:: +vgmul.vv vd, vs2 + +Encoding (Vector-Vector):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-P'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVV'}, +{bits: 5, name: '10001'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: '1'}, +{bits: 6, name: '101000'}, +]} +.... + + +Encoding (Vector-Scalar):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-P'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVV'}, +{bits: 5, name: '10001'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: '1'}, +{bits: 6, name: '101001'}, +]} +.... + +Reserved Encodings:: +* `SEW` is any value other than 32 +* `vgmul.vs` encoding (except if `Zvkgb` is enabled) + +Arguments:: + +[%autowidth] +[%header,cols="4,2,2,2,2,2"] +|=== +|Register +|Direction +|EGW +|EGS +|SEW +|Definition + +| Vd | input | 128 | 4 | 32 | Multiplier +| Vs2 | input | 128 | 4 | 32 | Multiplicand +| Vd | output | 128 | 4 | 32 | Product +|=== + +Description:: +A GHASH~H~ multiply is performed. + +The multipliers are read as 4-element groups from 'vd', + the multiplicands subkeys are read from either the corresponding 4-element group +in `vs2` (vector-vector form) or the scalar element group in `vs2` +(vector-scalar form, `Zvkgb` only). The resulting products are written as 4-element groups into `vd`. + +This instruction treats all of the inputs and outputs as 128-bit polynomials and +performs operations over GF[2]. +It produces the product over GF(2^128^) of the two 128-bit inputs. + +The multiplication over GF(2^128^) is a carryless multiply of two 128-bit polynomials +modulo GHASH's irreducible polynomial (x^128^ + x^7^ + x^2^ + x + 1). + +The NIST specification (see <>) orders the coefficients from left to right x~0~x~1~x~2~...x~127~ +for a polynomial x~0~ + x~1~u +x~2~ u^2^ + ... + x~127~u^127^. This can be viewed as a collection of +byte elements in memory with the byte containing the lowest coefficients (i.e., 0,1,2,3,4,5,6,7) +residing at the lowest memory address. Since the bits in the bytes are reversed, +This instruction internally performs bit swaps within bytes to put the bits in the standard ordering +(e.g., 7,6,5,4,3,2,1,0). + +This instruction must always be implemented such that its execution latency does not depend +on the data being operated upon. + +[NOTE] +==== +We are bit-reversing the bytes of inputs and outputs so that the intermediate values are consistent +with the NIST specification. These reversals are inexpensive to implement as they unconditionally +swap bit positions and therefore do not require any logic. +==== + + +[NOTE] +==== +The instruction `vgmul.vv` is identical to `vghsh.vv` with vs1=0. +This instruction is often used in GHASH code. In some cases it is followed +by an XOR to perform a multiply-add. Implementations may choose to fuse these +two instructions to improve performance on GHASH code that +doesn't use the add-multiply form of the `vghsh.vv` instruction. + +Similarly, the instruction `vgmul.vs` is identical to `vghsh.vs` with vs1=0. +==== + + +Operation:: +[source,pseudocode] +-- +function clause execute (VGMUL(vs2, vs1, vd, suffix)) = { + // operands are input with bits reversed in each byte + if(LMUL*VLEN < EGW) then { + handle_illegal(); // illegal instruction exception + RETIRE_FAIL + } else { + + eg_len = (vl/EGS) + eg_start = (vstart/EGS) + + foreach (i from eg_start to eg_len-1) { + let helem = if suffix == "vv" then i else 0; + let Y = brev8(get_velem(vd,EGW=128,i)); // Multiplier + let H = brev8(get_velem(vs2,EGW=128, helem)); // Multiplicand + let Z : bits(128) = 0; + + for (int bit = 0; bit < 128; bit++) { + if bit_to_bool(Y[bit]) + Z ^= H + + bool reduce = bit_to_bool(H[127]); + H = H << 1; // left shift H by 1 + if (reduce) + H ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial + } + + + let result = brev8(Z); + set_velem(vd, EGW=128, i, result); + } + RETIRE_SUCCESS + } +} +-- + +Included in:: +<>, <>, <>, <> From c10f7456b43199b55dd65113cd7a798e96459a05 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Sun, 27 Aug 2023 10:35:37 -0700 Subject: [PATCH 05/26] Revert "Completing vghsh.vs/vgmul.vs descriptions" This reverts commit bc7f52746c27030133b47fafe3247a7821ac3a45. --- doc/vector/insns/vghsh.adoc | 21 +++++++-------- doc/vector/insns/vgmul.adoc | 53 ++++++++++++------------------------- 2 files changed, 26 insertions(+), 48 deletions(-) diff --git a/doc/vector/insns/vghsh.adoc b/doc/vector/insns/vghsh.adoc index b487b11d..bb9c97a4 100644 --- a/doc/vector/insns/vghsh.adoc +++ b/doc/vector/insns/vghsh.adoc @@ -6,7 +6,7 @@ Vector Add-Multiply over GHASH Galois-Field Mnemonic:: vghsh.vv vd, vs2, vs1 + -vghsh.vs vd, rs2, vs1 +vghsh.vs vd, vs2, vs1 Encoding (Vector-Vector):: [wavedrom, , svg] @@ -40,7 +40,6 @@ Encoding (Vector-Scalar):: Reserved Encodings:: * `SEW` is any value other than 32 -* `vghsh.vs` encoding (except if `Zvkgb` is enabled) Arguments:: @@ -63,15 +62,7 @@ Arguments:: Description:: A single "iteration" of the GHASH~H~ algorithm is performed. - -The previous partial hashes are read as 4-element groups from 'vd', -the cipher texts are read as 4-element groups from `vs1` - and the hash subkeys are read from either the corresponding 4-element group -in `vs2` (vector-vector form) or the scalar element group in `vs2` -(vector-scalar form, `Zvkgb` only). The resulting partial hashes are writen as 4-element groups into `vd`. - - -This instruction treats all of the input and output element groups as 128-bit polynomials and +This instruction treats all of the inputs and outputs as 128-bit polynomials and performs operations over GF[2]. It produces the next partial hash (Y~i+1~) by adding the current partial hash (Y~i~) to the cipher text block (X~i~) and then multiplying (over GF(2^128^)) @@ -101,11 +92,17 @@ with the NIST specification. These reversals are inexpensive to implement as the swap bit positions and therefore do not require any logic. ==== +[NOTE] +==== +Since the same hash subkey `H` will typically be used repeatedly on a given message, +a future extension might define a vector-scalar version of this instruction where +`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1. +==== Operation:: [source,pseudocode] -- -function clause execute (VGHSH(vs2, vs1, vd, suffix)) = { +function clause execute (VGHSH(vs2, vs1, vd)) = { // operands are input with bits reversed in each byte if(LMUL*VLEN < EGW) then { handle_illegal(); // illegal instruction exception diff --git a/doc/vector/insns/vgmul.adoc b/doc/vector/insns/vgmul.adoc index ca858010..0008132c 100644 --- a/doc/vector/insns/vgmul.adoc +++ b/doc/vector/insns/vgmul.adoc @@ -7,7 +7,7 @@ Vector Multiply over GHASH Galois-Field Mnemonic:: vgmul.vv vd, vs2 -Encoding (Vector-Vector):: +Encoding:: [wavedrom, , svg] .... {reg:[ @@ -20,25 +20,8 @@ Encoding (Vector-Vector):: {bits: 6, name: '101000'}, ]} .... - - -Encoding (Vector-Scalar):: -[wavedrom, , svg] -.... -{reg:[ -{bits: 7, name: 'OP-P'}, -{bits: 5, name: 'vd'}, -{bits: 3, name: 'OPMVV'}, -{bits: 5, name: '10001'}, -{bits: 5, name: 'vs2'}, -{bits: 1, name: '1'}, -{bits: 6, name: '101001'}, -]} -.... - Reserved Encodings:: -* `SEW` is any value other than 32 -* `vgmul.vs` encoding (except if `Zvkgb` is enabled) +* `SEW` is any value other than 32 Arguments:: @@ -57,14 +40,9 @@ Arguments:: | Vd | output | 128 | 4 | 32 | Product |=== -Description:: +Description:: A GHASH~H~ multiply is performed. -The multipliers are read as 4-element groups from 'vd', - the multiplicands subkeys are read from either the corresponding 4-element group -in `vs2` (vector-vector form) or the scalar element group in `vs2` -(vector-scalar form, `Zvkgb` only). The resulting products are written as 4-element groups into `vd`. - This instruction treats all of the inputs and outputs as 128-bit polynomials and performs operations over GF[2]. It produces the product over GF(2^128^) of the two 128-bit inputs. @@ -89,23 +67,27 @@ with the NIST specification. These reversals are inexpensive to implement as the swap bit positions and therefore do not require any logic. ==== +[NOTE] +==== +Since the same multiplicand will typically be used repeatedly on a given message, +a future extension might define a vector-scalar version of this instruction where +`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1. +==== [NOTE] ==== -The instruction `vgmul.vv` is identical to `vghsh.vv` with vs1=0. +This instruction is identical to `vghsh.vv` with vs1=0. This instruction is often used in GHASH code. In some cases it is followed by an XOR to perform a multiply-add. Implementations may choose to fuse these -two instructions to improve performance on GHASH code that -doesn't use the add-multiply form of the `vghsh.vv` instruction. - -Similarly, the instruction `vgmul.vs` is identical to `vghsh.vs` with vs1=0. +two instructions to improve performance on GHASH code that +doesn't use the add-multiply form of the `vghsh.vv` instruction. ==== Operation:: [source,pseudocode] -- -function clause execute (VGMUL(vs2, vs1, vd, suffix)) = { +function clause execute (VGMUL(vs2, vs1, vd)) = { // operands are input with bits reversed in each byte if(LMUL*VLEN < EGW) then { handle_illegal(); // illegal instruction exception @@ -114,11 +96,10 @@ function clause execute (VGMUL(vs2, vs1, vd, suffix)) = { eg_len = (vl/EGS) eg_start = (vstart/EGS) - + foreach (i from eg_start to eg_len-1) { - let helem = if suffix == "vv" then i else 0; let Y = brev8(get_velem(vd,EGW=128,i)); // Multiplier - let H = brev8(get_velem(vs2,EGW=128, helem)); // Multiplicand + let H = brev8(get_velem(vs2,EGW=128,i)); // Multiplicand let Z : bits(128) = 0; for (int bit = 0; bit < 128; bit++) { @@ -132,7 +113,7 @@ function clause execute (VGMUL(vs2, vs1, vd, suffix)) = { } - let result = brev8(Z); + let result = brev8(Z); set_velem(vd, EGW=128, i, result); } RETIRE_SUCCESS @@ -141,4 +122,4 @@ function clause execute (VGMUL(vs2, vs1, vd, suffix)) = { -- Included in:: -<>, <>, <>, <> +<>, <>, <> From 4e70f70c60fa247529e282f2a59d505ddd9dd7a7 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Sun, 27 Aug 2023 10:35:40 -0700 Subject: [PATCH 06/26] Revert "[Zv fast track] prototyping vg* changes" This reverts commit a1bfcfce97ab683584bb9da4dc672be511c4a5cc. --- doc/vector/insns/vghsh.adoc | 41 ++++++++++--------------------------- 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/doc/vector/insns/vghsh.adoc b/doc/vector/insns/vghsh.adoc index bb9c97a4..cd02b0e6 100644 --- a/doc/vector/insns/vghsh.adoc +++ b/doc/vector/insns/vghsh.adoc @@ -1,14 +1,13 @@ [[insns-vghsh, Vector GHASH Add-Multiply]] -= vghsh.[vv,vs] += vghsh.vv Synopsis:: Vector Add-Multiply over GHASH Galois-Field Mnemonic:: -vghsh.vv vd, vs2, vs1 + -vghsh.vs vd, vs2, vs1 +vghsh.vv vd, vs2, vs1 -Encoding (Vector-Vector):: +Encoding:: [wavedrom, , svg] .... {reg:[ @@ -21,25 +20,8 @@ Encoding (Vector-Vector):: {bits: 6, name: '101100'}, ]} .... - -// This might be the first instruction with 3 operands and .vs -// need to find an encoding -Encoding (Vector-Scalar):: -[wavedrom, , svg] -.... -{reg:[ -{bits: 7, name: 'OP-P'}, -{bits: 5, name: 'vd'}, -{bits: 3, name: 'OPMVV'}, -{bits: 5, name: 'vs1'}, -{bits: 5, name: 'vs2'}, -{bits: 1, name: '1'}, -{bits: 6, name: '101100'}, -]} -.... - Reserved Encodings:: -* `SEW` is any value other than 32 +* `SEW` is any value other than 32 Arguments:: @@ -59,10 +41,10 @@ Arguments:: | Vd | output | 128 | 4 | 32 | Partial-hash (Y~i+1~) |=== -Description:: +Description:: A single "iteration" of the GHASH~H~ algorithm is performed. -This instruction treats all of the inputs and outputs as 128-bit polynomials and +This instruction treats all of the inputs and outputs as 128-bit polynomials and performs operations over GF[2]. It produces the next partial hash (Y~i+1~) by adding the current partial hash (Y~i~) to the cipher text block (X~i~) and then multiplying (over GF(2^128^)) @@ -78,7 +60,7 @@ Y~i+1~ = ((Y~i~ ^ X~i~) · H) The NIST specification (see <>) orders the coefficients from left to right x~0~x~1~x~2~...x~127~ for a polynomial x~0~ + x~1~u +x~2~ u^2^ + ... + x~127~u^127^. This can be viewed as a collection of byte elements in memory with the byte containing the lowest coefficients (i.e., 0,1,2,3,4,5,6,7) -residing at the lowest memory address. Since the bits in the bytes are reversed, +residing at the lowest memory address. Since the bits in the bytes are reversed, This instruction internally performs bit swaps within bytes to put the bits in the standard ordering (e.g., 7,6,5,4,3,2,1,0). @@ -96,7 +78,7 @@ swap bit positions and therefore do not require any logic. ==== Since the same hash subkey `H` will typically be used repeatedly on a given message, a future extension might define a vector-scalar version of this instruction where -`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1. +`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1. ==== Operation:: @@ -111,12 +93,11 @@ function clause execute (VGHSH(vs2, vs1, vd)) = { eg_len = (vl/EGS) eg_start = (vstart/EGS) - + foreach (i from eg_start to eg_len-1) { - let helem = if suffix == "vv" then i else 0; let Y = (get_velem(vd,EGW=128,i)); // current partial-hash let X = get_velem(vs1,EGW=128,i); // block cipher output - let H = brev8(get_velem(vs2, EGW=128, helem)); // Hash subkey + let H = brev8(get_velem(vs2,EGW=128,i)); // Hash subkey let Z : bits(128) = 0; @@ -141,4 +122,4 @@ function clause execute (VGHSH(vs2, vs1, vd)) = { -- Included in:: -<>, <>, <>, <> +<>, <>, <> From b0af2774d52cf796fbf4620be808c7442c963784 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Sun, 27 Aug 2023 10:35:42 -0700 Subject: [PATCH 07/26] Revert "[Zv fast track] prototyping vclmul* changes" This reverts commit 4a59d4ba083a1ae4a5d24c6dff726f2508d9a245. --- doc/vector/insns/vclmul.adoc | 21 ++++++++++++--------- doc/vector/insns/vclmulh.adoc | 19 +++++++++---------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/doc/vector/insns/vclmul.adoc b/doc/vector/insns/vclmul.adoc index 499d7562..ad941888 100644 --- a/doc/vector/insns/vclmul.adoc +++ b/doc/vector/insns/vclmul.adoc @@ -36,8 +36,7 @@ Encoding (Vector-Scalar):: ]} .... Reserved Encodings:: -* `SEW` is any value other than 64 (`Zvbc`) -* `SEW` is any value other than 32 or 64 (`Zvbcb`) +* `SEW` is any value other than 64 Arguments:: @@ -56,16 +55,20 @@ Arguments:: Description:: Produces the low half of 128-bit carry-less product. -Each SEW-bit element in the `vs2` vector register is carry-less multiplied by -either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value +Each 64-bit element in the `vs2` vector register is carry-less multiplied by +either each 64-bit element in `vs1` (vector-vector), or the 64-bit value from integer register `rs1` (vector-scalar). The result is the least -significant SEW bits of the carry-less product. +significant 64 bits of the carry-less product. [NOTE] ==== The 64-bit carryless multiply instructions can be used for implementing GCM in the absence of the `zvkg` extension. We do not make these instructions exclusive as the 64-bit carryless multiply is readily derived from the instructions in the `zvkg` extension and can have utility in other areas. +Likewise, we treat other SEW values as reserved so as not to preclude +future extensions from using this opcode with different element widths. +For example, a future extension might define an `SEW`=32 version of this instruction to enable `Zve32*` implementations to have +vector carryless multiplication instructions. ==== Operation:: @@ -76,10 +79,10 @@ Operation:: function clause execute (VCLMUL(vs2, vs1, vd, suffix)) = { foreach (i from vstart to vl-1) { - let op1 : bits (SEW) = if suffix =="vv" then get_velem(vs1, i) + let op1 : bits (64) = if suffix =="vv" then get_velem(vs1,i) else zext_or_truncate_to_sew(X(vs1)); - let op2 : bits (SEW) = get_velem(vs2, i); - let product : bits (SEW) = clmul(op1, op2, SEW); + let op2 : bits (64) = get_velem(vs2,i); + let product : bits (64) = clmul(op1,op2,SEW); set_velem(vd, i, product); } RETIRE_SUCCESS @@ -95,4 +98,4 @@ function clmul(x, y, width) = { -- Included in:: -<>, <>, <>, <> +<>, <>, <> diff --git a/doc/vector/insns/vclmulh.adoc b/doc/vector/insns/vclmulh.adoc index b5c0acb7..44f125ce 100644 --- a/doc/vector/insns/vclmulh.adoc +++ b/doc/vector/insns/vclmulh.adoc @@ -36,8 +36,7 @@ Encoding (Vector-Scalar):: ]} .... Reserved Encodings:: -* `SEW` is any value other than 64 (`Zvbcb`) -* `SEW` is any value other than 32 or 64 (`Zvbcb`) +* `SEW` is any value other than 64 Arguments:: @@ -53,13 +52,13 @@ Arguments:: | Vd | output | carry-less product high |=== -Description:: +Description:: Produces the high half of 128-bit carry-less product. -Each SEW-bit element in the `vs2` vector register is carry-less multiplied by -either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value +Each 64-bit element in the `vs2` vector register is carry-less multiplied by +either each 64-bit element in `vs1` (vector-vector), or the 64-bit value from integer register `rs1` (vector-scalar). The result is the most -significant SEW bits of the carry-less product. +significant 64 bits of the carry-less product. // This instruction must always be implemented such that its execution latency does not depend // on the data being operated upon. @@ -70,10 +69,10 @@ Operation:: function clause execute (VCLMULH(vs2, vs1, vd, suffix)) = { foreach (i from vstart to vl-1) { - let op1 : bits (SEW) = if suffix =="vv" then get_velem(vs1,i) + let op1 : bits (64) = if suffix =="vv" then get_velem(vs1,i) else zext_or_truncate_to_sew(X(vs1)); - let op2 : bits (SEW) = get_velem(vs2, i); - let product : bits (SEW) = clmulh(op1, op2, SEW); + let op2 : bits (64) = get_velem(vs2, i); + let product : bits (64) = clmulh(op1, op2, SEW); set_velem(vd, i, product); } RETIRE_SUCCESS @@ -90,4 +89,4 @@ function clmulh(x, y, width) = { -- Included in:: -<>, <>, <>, <> +<>, <>, <> From 72084cdd6edd904242c77ca8383b3ee6fba593d5 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Sun, 27 Aug 2023 10:44:05 -0700 Subject: [PATCH 08/26] refactoring Zvkgs and vghsh.vs specifications --- .../riscv-crypto-vector-zvkgs.adoc | 27 ++++-------- doc/vector-extra/vghsh-vs.adoc | 43 +++++++------------ 2 files changed, 25 insertions(+), 45 deletions(-) diff --git a/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc b/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc index 254e2ade..40787c63 100644 --- a/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc +++ b/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc @@ -1,23 +1,14 @@ -[[zvkg,Zvkg]] -=== `Zvkg` - Vector GCM/GMAC +[[zvkgs,Zvkgs]] +=== `Zvkgs` - Vector-Scalar GCM/GMAC -Instructions to enable the efficient implementation of GHASH~H~ which is used in Galois/Counter Mode (GCM) and -Galois Message Authentication Code (GMAC). +`Zvkgs` depends on `Zvkg`, it extends the existing `vghsh.vv` and `vgmul.vv` instructions with new vector-scalar variants: `vghsh.vs` and `vgmul.vs`. -All of these instructions work on 128-bit element groups comprised of four 32-bit elements. +Instructions to enable the efficient implementation of parallel versions of GHASH~H~ which is used in Galois/Counter Mode (GCM) and +Galois Message Authentication Code (GMAC). -GHASH~H~ is defined in the -// link:https://csrc.nist.gov/publications/detail/sp/800-38d/final[NIST Special Publication 800-38D] - "Recommendation for Block Cipher Modes of Operation: Galois/Counter Mode (GCM) and GMAC" - cite:[nist:gcm] -(NIST Specification). +The instructions inherit the same constraints (element group size, data independent execution timing and `vl`/`vstart` multiple constraints). -[NOTE] -==== -GCM is used in conjunction with block ciphers (e.g., AES and SM4) to encrypt a message and -provide authentication. -GMAC is used to provide authentication of a message without encryption. -==== +All of these instructions work on 128-bit element groups comprised of four 32-bit elements. To help avoid side-channel timing attacks, these instructions shall be implemented with data-independent timing. @@ -34,8 +25,8 @@ Likewise, `vstart` must be a multiple of `EGS=4`. |EGW |Mnemonic |Instruction -| 32 | 128 | vghsh.vv | <> -| 32 | 128 | vgmul.vv | <> +| 32 | 128 | vghsh.vs | <> +| 32 | 128 | vgmul.vs | <> |=== diff --git a/doc/vector-extra/vghsh-vs.adoc b/doc/vector-extra/vghsh-vs.adoc index b487b11d..1d531381 100644 --- a/doc/vector-extra/vghsh-vs.adoc +++ b/doc/vector-extra/vghsh-vs.adoc @@ -1,26 +1,12 @@ -[[insns-vghsh, Vector GHASH Add-Multiply]] -= vghsh.[vv,vs] +[[insns-vghsh-vs, Vector-Scalar GHASH Add-Multiply]] += vghsh.vs Synopsis:: -Vector Add-Multiply over GHASH Galois-Field +Vector-Scalar Add-Multiply over GHASH Galois-Field Mnemonic:: -vghsh.vv vd, vs2, vs1 + -vghsh.vs vd, rs2, vs1 +vghsh.vs vd, vs2, vs1 -Encoding (Vector-Vector):: -[wavedrom, , svg] -.... -{reg:[ -{bits: 7, name: 'OP-P'}, -{bits: 5, name: 'vd'}, -{bits: 3, name: 'OPMVV'}, -{bits: 5, name: 'vs1'}, -{bits: 5, name: 'vs2'}, -{bits: 1, name: '1'}, -{bits: 6, name: '101100'}, -]} -.... // This might be the first instruction with 3 operands and .vs // need to find an encoding @@ -40,7 +26,6 @@ Encoding (Vector-Scalar):: Reserved Encodings:: * `SEW` is any value other than 32 -* `vghsh.vs` encoding (except if `Zvkgb` is enabled) Arguments:: @@ -66,10 +51,12 @@ A single "iteration" of the GHASH~H~ algorithm is performed. The previous partial hashes are read as 4-element groups from 'vd', the cipher texts are read as 4-element groups from `vs1` - and the hash subkeys are read from either the corresponding 4-element group -in `vs2` (vector-vector form) or the scalar element group in `vs2` -(vector-scalar form, `Zvkgb` only). The resulting partial hashes are writen as 4-element groups into `vd`. + and the hash subkeys are read from the scalar element group in `vs2` +The resulting partial hashes are writen as 4-element groups into `vd`. + +// The following is copied from vghsh.vv and could be omitted +// (replaced with a link to the original specification) This instruction treats all of the input and output element groups as 128-bit polynomials and performs operations over GF[2]. @@ -105,7 +92,7 @@ swap bit positions and therefore do not require any logic. Operation:: [source,pseudocode] -- -function clause execute (VGHSH(vs2, vs1, vd, suffix)) = { +function clause execute (VGHSHVS(vs2, vs1, vd)) = { // operands are input with bits reversed in each byte if(LMUL*VLEN < EGW) then { handle_illegal(); // illegal instruction exception @@ -115,11 +102,13 @@ function clause execute (VGHSH(vs2, vs1, vd, suffix)) = { eg_len = (vl/EGS) eg_start = (vstart/EGS) + // H is component to all element groups + let helem = 0; + let H = brev8(get_velem(vs2, EGW=128, helem)); // Hash subkey + foreach (i from eg_start to eg_len-1) { - let helem = if suffix == "vv" then i else 0; - let Y = (get_velem(vd,EGW=128,i)); // current partial-hash + let Y = get_velem(vd,EGW=128,i); // current partial-hash let X = get_velem(vs1,EGW=128,i); // block cipher output - let H = brev8(get_velem(vs2, EGW=128, helem)); // Hash subkey let Z : bits(128) = 0; @@ -144,4 +133,4 @@ function clause execute (VGHSH(vs2, vs1, vd, suffix)) = { -- Included in:: -<>, <>, <>, <> +<> From 8c5a9f255c3640ab40c537a3e9ced02a5921d02b Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Mon, 28 Aug 2023 01:15:11 -0700 Subject: [PATCH 09/26] fixing vghsh.vs/vgmul.vs descriptions --- doc/vector-extra/vghsh-vs.adoc | 2 +- doc/vector-extra/vgmul-vs.adoc | 41 +++++++++++----------------------- 2 files changed, 14 insertions(+), 29 deletions(-) diff --git a/doc/vector-extra/vghsh-vs.adoc b/doc/vector-extra/vghsh-vs.adoc index 1d531381..e1bf1c7d 100644 --- a/doc/vector-extra/vghsh-vs.adoc +++ b/doc/vector-extra/vghsh-vs.adoc @@ -51,7 +51,7 @@ A single "iteration" of the GHASH~H~ algorithm is performed. The previous partial hashes are read as 4-element groups from 'vd', the cipher texts are read as 4-element groups from `vs1` - and the hash subkeys are read from the scalar element group in `vs2` + and the hash subkeys are read from the scalar element group in `vs2`. The resulting partial hashes are writen as 4-element groups into `vd`. diff --git a/doc/vector-extra/vgmul-vs.adoc b/doc/vector-extra/vgmul-vs.adoc index ca858010..1192f334 100644 --- a/doc/vector-extra/vgmul-vs.adoc +++ b/doc/vector-extra/vgmul-vs.adoc @@ -1,25 +1,11 @@ -[[insns-vgmul, Vector GHASH Multiply]] -= vgmul.vv +[[insns-vgmul-vs, Vector GHASH Multiply]] += vgmul.vs Synopsis:: -Vector Multiply over GHASH Galois-Field +Vector-Scalar Multiply over GHASH Galois-Field Mnemonic:: -vgmul.vv vd, vs2 - -Encoding (Vector-Vector):: -[wavedrom, , svg] -.... -{reg:[ -{bits: 7, name: 'OP-P'}, -{bits: 5, name: 'vd'}, -{bits: 3, name: 'OPMVV'}, -{bits: 5, name: '10001'}, -{bits: 5, name: 'vs2'}, -{bits: 1, name: '1'}, -{bits: 6, name: '101000'}, -]} -.... +vgmul.vs vd, vs2 Encoding (Vector-Scalar):: @@ -38,7 +24,6 @@ Encoding (Vector-Scalar):: Reserved Encodings:: * `SEW` is any value other than 32 -* `vgmul.vs` encoding (except if `Zvkgb` is enabled) Arguments:: @@ -61,11 +46,10 @@ Description:: A GHASH~H~ multiply is performed. The multipliers are read as 4-element groups from 'vd', - the multiplicands subkeys are read from either the corresponding 4-element group -in `vs2` (vector-vector form) or the scalar element group in `vs2` -(vector-scalar form, `Zvkgb` only). The resulting products are written as 4-element groups into `vd`. + the multiplicands subkeys are read from the scalar element group in `vs2`. +The resulting products are written as 4-element groups into `vd`. -This instruction treats all of the inputs and outputs as 128-bit polynomials and +This instruction treats all of the inputs and outputs as 128-bit polynomials and performs operations over GF[2]. It produces the product over GF(2^128^) of the two 128-bit inputs. @@ -92,13 +76,13 @@ swap bit positions and therefore do not require any logic. [NOTE] ==== -The instruction `vgmul.vv` is identical to `vghsh.vv` with vs1=0. +Similarly to how the instruction `vgmul.vv` is identical to `vghsh.vv` with the value +of vs1 register being 0, the instruction `vgmul.vs` is identical to `vghsh.vs` with the value of vs1 being 0. This instruction is often used in GHASH code. In some cases it is followed by an XOR to perform a multiply-add. Implementations may choose to fuse these two instructions to improve performance on GHASH code that doesn't use the add-multiply form of the `vghsh.vv` instruction. -Similarly, the instruction `vgmul.vs` is identical to `vghsh.vs` with vs1=0. ==== @@ -114,11 +98,12 @@ function clause execute (VGMUL(vs2, vs1, vd, suffix)) = { eg_len = (vl/EGS) eg_start = (vstart/EGS) + // H multiplicand is constant for all loop iterations + let helem = 0; + let H = brev8(get_velem(vs2,EGW=128, helem)); // Multiplicand foreach (i from eg_start to eg_len-1) { - let helem = if suffix == "vv" then i else 0; let Y = brev8(get_velem(vd,EGW=128,i)); // Multiplier - let H = brev8(get_velem(vs2,EGW=128, helem)); // Multiplicand let Z : bits(128) = 0; for (int bit = 0; bit < 128; bit++) { @@ -141,4 +126,4 @@ function clause execute (VGMUL(vs2, vs1, vd, suffix)) = { -- Included in:: -<>, <>, <>, <> +<> From 056dd04083c6442f8b865fabd1d56f1c84343484 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Mon, 28 Aug 2023 01:15:39 -0700 Subject: [PATCH 10/26] adding vclmul/vclmulh instruction specification for Zve32e --- doc/vector-extra/insns/vclmul.adoc | 104 ++++++++++++++++++++++++++++ doc/vector-extra/insns/vclmulh.adoc | 98 ++++++++++++++++++++++++++ 2 files changed, 202 insertions(+) create mode 100644 doc/vector-extra/insns/vclmul.adoc create mode 100644 doc/vector-extra/insns/vclmulh.adoc diff --git a/doc/vector-extra/insns/vclmul.adoc b/doc/vector-extra/insns/vclmul.adoc new file mode 100644 index 00000000..e1874bf2 --- /dev/null +++ b/doc/vector-extra/insns/vclmul.adoc @@ -0,0 +1,104 @@ +[[insns-vclmul-32e, Vector Carry-less Multiply]] += vclmul.[vv,vx] + +Synopsis:: +Vector Carry-less Multiply by vector or scalar - returning low half of product. + +Mnemonic:: +vclmul.vv vd, vs2, vs1, vm + +vclmul.vx vd, vs2, rs1, vm + +Encoding (Vector-Vector):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-V'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVV'}, +{bits: 5, name: 'vs1'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: 'vm'}, +{bits: 6, name: '001100'}, +]} +.... + +Encoding (Vector-Scalar):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-V'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVX'}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: 'vm'}, +{bits: 6, name: '001100'}, +]} +.... +Reserved Encodings:: +* `SEW` is any value other than 32 (`Zvbc32e` only) +* `SEW` is any value other than 64 (`Zvbc` only) +* `SEW` is any value other than 32 or 64 (`Zvbc` and `Zvbc32e`) + +Arguments:: + +[%autowidth] +[%header,cols="4,2,2"] +|=== +|Register +|Direction +|Definition + +| Vs1/Rs1 | input | multiplier +| Vs2 | input | multiplicand +| Vd | output | carry-less product low +|=== + +[NOTE] +==== +`vclmul` instruction was initially defined in `Zvbc` with only `SEW=64-bit` support, this page describes how the specification is extended in `Zvbc32e` to support `SEW=32 bits`. +==== + +Description:: +Produces the low half of 128-bit carry-less product. + +Each SEW-bit element in the `vs2` vector register is carry-less multiplied by +either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value +from integer register `rs1` (vector-scalar). The result is the least +significant SEW bits of the carry-less product. + +[NOTE] +==== +The 64-bit carryless multiply instructions can be used for implementing GCM in the absence of the `zvkg` extension. +We do not make these instructions exclusive as the 64-bit carryless multiply is readily derived from the +instructions in the `zvkg` extension and can have utility in other areas. +==== + +Operation:: +[source,sail] +-- + + +function clause execute (VCLMUL(vs2, vs1, vd, suffix)) = { + + foreach (i from vstart to vl-1) { + let op1 : bits (SEW) = if suffix =="vv" then get_velem(vs1, i) + else zext_or_truncate_to_sew(X(vs1)); + let op2 : bits (SEW) = get_velem(vs2, i); + let product : bits (SEW) = clmul(op1, op2, SEW); + set_velem(vd, i, product); + } + RETIRE_SUCCESS +} + +function clmul(x, y, width) = { + let result : bits(width) = zeros(); + foreach (i from 0 to (width - 1)) { + if y[i] == 1 then result = result ^ (x << i); + } + result +} +-- + +Included in:: +<> diff --git a/doc/vector-extra/insns/vclmulh.adoc b/doc/vector-extra/insns/vclmulh.adoc new file mode 100644 index 00000000..6f536542 --- /dev/null +++ b/doc/vector-extra/insns/vclmulh.adoc @@ -0,0 +1,98 @@ +[[insns-vclmulh, Vector Carry-less Multiply Return High Half]] += vclmulh.[vv,vx] + +Synopsis:: +Vector Carry-less Multiply by vector or scalar - returning high half of product. + +Mnemonic:: +vclmulh.vv vd, vs2, vs1, vm + +vclmulh.vx vd, vs2, rs1, vm + +Encoding (Vector-Vector):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-V'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVV'}, +{bits: 5, name: 'vs1'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: 'vm'}, +{bits: 6, name: '001101'}, +]} +.... + +Encoding (Vector-Scalar):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-V'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVX'}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: 'vm'}, +{bits: 6, name: '001101'}, +]} +.... +Reserved Encodings:: +* `SEW` is any value other than 64 (`Zvbcb`) +* `SEW` is any value other than 32 or 64 (`Zvbcb`) + +Arguments:: + +[%autowidth] +[%header,cols="4,2,2"] +|=== +|Register +|Direction +|Definition + +| Vs1 | input | multiplier +| Vs2 | input | multiplicand +| Vd | output | carry-less product high +|=== + +[NOTE] +==== +`vclmulh` instruction was initially defined in `Zvbc`, this page describes how the specification is extended in `Zvbc32e` to support `SEW=32 bits`. +==== + +Description:: +Produces the high half of 128-bit carry-less product. + +Each SEW-bit element in the `vs2` vector register is carry-less multiplied by +either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value +from integer register `rs1` (vector-scalar). The result is the most +significant SEW bits of the carry-less product. + +// This instruction must always be implemented such that its execution latency does not depend +// on the data being operated upon. + +Operation:: +[source,sail] +-- +function clause execute (VCLMULH(vs2, vs1, vd, suffix)) = { + + foreach (i from vstart to vl-1) { + let op1 : bits (SEW) = if suffix =="vv" then get_velem(vs1,i) + else zext_or_truncate_to_sew(X(vs1)); + let op2 : bits (SEW) = get_velem(vs2, i); + let product : bits (SEW) = clmulh(op1, op2, SEW); + set_velem(vd, i, product); + } + RETIRE_SUCCESS +} + +function clmulh(x, y, width) = { + let result : bits(width) = 0; + foreach (i from 1 to (width - 1)) { + if y[i] == 1 then result = result ^ (x >> (width - i)); + } + result +} + +-- + +Included in:: +<>, <>, <>, <> From 11bd8af6b317c8b2080b074702765db5e9089c7f Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Mon, 28 Aug 2023 01:16:37 -0700 Subject: [PATCH 11/26] moving vghsh.vs/vgmul.vs spec from doc/vector-extra to doc/vector-extra/insns/ --- doc/vector-extra/{ => insns}/vghsh-vs.adoc | 0 doc/vector-extra/{ => insns}/vgmul-vs.adoc | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename doc/vector-extra/{ => insns}/vghsh-vs.adoc (100%) rename doc/vector-extra/{ => insns}/vgmul-vs.adoc (100%) diff --git a/doc/vector-extra/vghsh-vs.adoc b/doc/vector-extra/insns/vghsh-vs.adoc similarity index 100% rename from doc/vector-extra/vghsh-vs.adoc rename to doc/vector-extra/insns/vghsh-vs.adoc diff --git a/doc/vector-extra/vgmul-vs.adoc b/doc/vector-extra/insns/vgmul-vs.adoc similarity index 100% rename from doc/vector-extra/vgmul-vs.adoc rename to doc/vector-extra/insns/vgmul-vs.adoc From 5e836daad9214be01dae84ba7e7492f5a9b26558 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Mon, 28 Aug 2023 01:16:54 -0700 Subject: [PATCH 12/26] adding instruction table --- .../riscv-crypto-vector-extra-inst-table.adoc | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc diff --git a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc new file mode 100644 index 00000000..01c1bd23 --- /dev/null +++ b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc @@ -0,0 +1,60 @@ +[appendix] +[[crypto_vector_instructions]] +=== Crypto Vector Cryptographic Instructions + +OP-P (0x77) +Crypto Vector instructions, including Zvkgs, except Zvbb and Zvbc +The new/modified encoding are in bold and underlined. + +// [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"] +[cols="4,1,1,1,1,4,1,1,1,4,1,1,1"] +|=== +5+^|Integer 4+^|Integer 4+^| FP + +| funct3 | | | | | funct3 | | | | funct3 | | | +| OPIVV |V| | | | OPMVV |V| | | OPFVV |V| | +| OPIVX | |X| | | OPMVX | |X| | OPFVF | |F| +| OPIVI | | |I| | | | | | | | | +|=== + +// [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"] +[cols="6,1,1,1,1,6,1,1,6,6,1,1,1"] + +// TODO to be updated with vghsh.vs and vgmul.vs encoding +|=== +5+^| funct6 4+^| funct6 4+^| funct6 + +|100000 | | | | | 100000 |V| | vsm3me | 100000 | | | +| 100001 | | | | | 100001 |V| | vsm4k.vi | 100001 | | | +| 100010 | | | | | 100010 |V| | vaesfk1.vi | 100010 | | | +| 100011 | | | | | 100011 | | | __**vghsh.vs**__ | 100011 | | | +| 100100 | | | | | 100100 | | | | 100100 | | | +| 100101 | | | | | 100101 | | | | 100101 | | | +| 100110 | | | | | 100110 | | | | 100110 | | | +| 100111 | | | | | 100111 | | | | 100111 | | | +| | | | | | | | | | | | | +| 101000 | | | | | 101000 |V| | VAES.vv | 101000 | | | +| 101001 | | | | | 101001 |V| | *VAES.vs* | 101001 | | | +| 101010 | | | | | 101010 |V| | vaesfk2.vi | 101010 | | | +| 101011 | | | | | 101011 |V| | vsm3c.vi | 101011 | | | +| 101100 | | | | | 101100 |V| | vghsh | 101100 | | | +| 101101 | | | | | 101101 |V| | vsha2ms | 101101 | | | +| 101110 | | | | | 101110 |V| | vsha2ch | 101110 | | | +| 101111 | | | | | 101111 |V| | vsha2cl | 101111 | | | +|=== + +<<< + +.VAES.vv and VAES.vs encoding space +[cols="2,14"] +|=== +|vs1| + +| 00000 | vaesdm +| 00001 | vaesdf +| 00010 | vaesem +| 00011 | vaesef +| 00111 | vaesz +| 10000 | vsm4r +| 10001 | __**vgmul**__ +|=== From c986a6ffaeddbf5b5c411ea25a50fdba263b6027 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Mon, 28 Aug 2023 01:24:07 -0700 Subject: [PATCH 13/26] main document for vector extra --- .../riscv-crypto-spec-vector-extra.adoc | 135 +++--------------- 1 file changed, 21 insertions(+), 114 deletions(-) diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc index 768d7999..f3adb3df 100644 --- a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc +++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc @@ -46,14 +46,12 @@ endif::[] [colophon] = Colophon -This document describes the Vector Cryptography extensions to the +This document describes the Vector Cryptography Extra extensions to the RISC-V Instruction Set Architecture. -This document is _frozen_. -Change is extremely unlikely. A high threshold will be used, and a -change will only occur because of some truly critical issue being -identified during the public review cycle. Any other desired or needed -changes can be the subject of a follow-on new extension. +This document is _Discussion Document_. +Assume everything can change. +This document is not complete yet and was created only for the purpose of conversation outside of the document. For more information, see link:http://riscv.org/spec-state[here]. [NOTE] @@ -66,7 +64,7 @@ link:http://creativecommons.org/licenses/by/4.0/[Creative Commons Attribution 4. ==== include::git-commit.adoc[] -See link:https://github.com/riscv/riscv-crypto[github.com/riscv/riscv-crypto] +See link:https://github.com/riscv/riscv-crypto/doc/vector-extra[github.com/riscv/riscv-crypto/doc/vector-extra] for more information. ==== @@ -75,14 +73,9 @@ for more information. Contributors to this specification (in alphabetical order) include: + -Alan Baum, -Barna Ibrahim, -Barry Spinney, -Ben Marshall, -Derek Atkins, -link:mailto:kdockser@tenstorrent.com[Ken Dockser] (Editor), +Ken Dockser, Markku-Juhani O. Saarinen, -Nicolas Brunie, +Nicolas Brunie, Richard Newell We are all very grateful to the many other people who have @@ -91,34 +84,22 @@ feedback and questions. // ------------------------------------------------------------ -include::riscv-crypto-vector-introduction.adoc[] -include::riscv-crypto-vector-audience.adoc[] -include::riscv-crypto-vector-sail-specifications.adoc[] -include::riscv-crypto-vector-policies.adoc[] +include::riscv-crypto-vector-extra-introduction.adoc[] // ------------------------------------------------------------ -include::./riscv-crypto-vector-element-groups.adoc[] -include::./riscv-crypto-vector-instruction-constraints.adoc[] -include::./riscv-crypto-vector-scalar-instructions.adoc[] -include::./riscv-crypto-vector-software-portability.adoc[] <<< - // ------------------------------------------------------------ [[crypto_vector_extensions]] == Extensions Overview -The section introduces all of the extensions in the Vector Cryptography +The section introduces all of the extensions in the Vector Cryptography Extra Instruction Set Extension Specification. -The <> and <> Vector Crypto Extensions ---and accordingly the composite extensions <> and <>-- -require a Zve64x base, -or application ("V") base Vector Extension. -All of the other Vector Crypto Extensions can be built +All the Vector Crypto Extra Extensions can be built on _any_ embedded (Zve*) or application ("V") base Vector Extension. // See <> for more details on vector element groups and the drawbacks of @@ -126,14 +107,14 @@ on _any_ embedded (Zve*) or application ("V") base Vector Extension. All _cryptography-specific_ instructions defined in this Vector Crypto specification (i.e., those -in <>, <>, <>, <> and <> but _not_ <>,<>, or <>) shall +in <>, but _not_ <>) shall be executed with data-independent execution latency as defined in the link:https://github.com/riscv/riscv-crypto/releases/tag/v1.0.1-scalar[RISC-V Scalar Cryptography Extensions specification]. It is important to note that the Vector Crypto instructions are independent of the implementation of the `Zkt` extension and do not require that `Zkt` is implemented. -This specification includes a <> extension that, when implemented, requires certain vector instructions -(including <>, <>, and <>) to be executed with data-independent execution latency. +//This specification includes a <> extension that, when implemented, requires certain vector instructions +//(including <>, <>, and <>) to be executed with data-independent execution latency. Detection of individual cryptography extensions uses the unified software-based RISC-V discovery method. @@ -144,98 +125,26 @@ At the time of writing, these discovery mechanisms are still a work in progress. ==== -include::./riscv-crypto-vector-zvbb.adoc[] +include::./riscv-crypto-vector-extra-zvbc32e.adoc[] <<< -include::./riscv-crypto-vector-zvbc.adoc[] -<<< -include::./riscv-crypto-vector-zvkb.adoc[] -<<< -include::./riscv-crypto-vector-zvkg.adoc[] -<<< -include::./riscv-crypto-vector-zvkned.adoc[] -<<< -include::./riscv-crypto-vector-zvknh.adoc[] -<<< -include::./riscv-crypto-vector-zvksed.adoc[] -<<< -include::./riscv-crypto-vector-zvksh.adoc[] -<<< -include::./riscv-crypto-vector-zvkn.adoc[] -<<< -include::./riscv-crypto-vector-zvknc.adoc[] -<<< -include::./riscv-crypto-vector-zvkng.adoc[] -<<< -include::./riscv-crypto-vector-zvks.adoc[] -<<< -include::./riscv-crypto-vector-zvksc.adoc[] -<<< -include::./riscv-crypto-vector-zvksg.adoc[] -<<< -include::./riscv-crypto-vector-zvkt.adoc[] +include::./riscv-crypto-vector-zvkgs.adoc[] <<< // ------------------------------------------------------------ -[[crypto_vector_insns, reftext="Vector Cryptography Instructions"]] +[[crypto_vector_extra_insns, reftext="Vector Cryptography Extra Instructions"]] == Instructions -include::insns/vaesdf.adoc[leveloffset=+2] -<<< -include::insns/vaesdm.adoc[leveloffset=+2] -<<< -include::insns/vaesef.adoc[leveloffset=+2] -<<< -include::insns/vaesem.adoc[leveloffset=+2] -<<< -include::insns/vaeskf1.adoc[leveloffset=+2] -<<< -include::insns/vaeskf2.adoc[leveloffset=+2] -<<< -include::insns/vaesz.adoc[leveloffset=+2] -<<< -include::insns/vandn.adoc[leveloffset=+2] -<<< -include::insns/vbrev.adoc[leveloffset=+2] -<<< -include::insns/vbrev8.adoc[leveloffset=+2] -<<< -include::insns/vclmul.adoc[leveloffset=+2] -<<< -include::insns/vclmulh.adoc[leveloffset=+2] -<<< -include::insns/vclz.adoc[leveloffset=+2] -<<< -include::insns/vcpop.adoc[leveloffset=+2] -<<< -include::insns/vctz.adoc[leveloffset=+2] -<<< -include::insns/vghsh.adoc[leveloffset=+2] -<<< -include::insns/vgmul.adoc[leveloffset=+2] -<<< -include::insns/vrev8.adoc[leveloffset=+2] -<<< -include::insns/vrol.adoc[leveloffset=+2] -<<< -include::insns/vror.adoc[leveloffset=+2] -<<< -include::insns/vsha2c.adoc[leveloffset=+2] -<<< -include::insns/vsha2ms.adoc[leveloffset=+2] -<<< -include::insns/vsm3c.adoc[leveloffset=+2] -<<< -include::insns/vsm3me.adoc[leveloffset=+2] +include::insns/vclmul-32e.adoc[leveloffset=+2] <<< -include::insns/vsm4k.adoc[leveloffset=+2] +include::insns/vclmulh-32e.adoc[leveloffset=+2] <<< -include::insns/vsm4r.adoc[leveloffset=+2] +include::insns/vghsh-vs.adoc[leveloffset=+2] <<< -include::insns/vwsll.adoc[leveloffset=+2] +include::insns/vgmul-vs.adoc[leveloffset=+2] <<< [[bibliography]] @@ -245,8 +154,6 @@ bibliography::../riscv-crypto-spec.bib[ieee] [[Encodings]] == Encodings -include::./riscv-crypto-vector-inst-table.adoc[] -include::./riscv-crypto-vector-inst-table-zvbb-zvbc.adoc[] +include::./riscv-crypto-vector-extra-inst-table.adoc[] -include::./riscv-crypto-vector-appx-sail.adoc[] From 4ae2021a989ddc90fda47fd4db721c3d850ea322 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Mon, 28 Aug 2023 01:25:06 -0700 Subject: [PATCH 14/26] renaming vclmul/vclmulh 32e spec files --- doc/vector-extra/insns/{vclmul.adoc => vclmul-32e.adoc} | 0 doc/vector-extra/insns/{vclmulh.adoc => vclmulh-32e.adoc} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename doc/vector-extra/insns/{vclmul.adoc => vclmul-32e.adoc} (100%) rename doc/vector-extra/insns/{vclmulh.adoc => vclmulh-32e.adoc} (100%) diff --git a/doc/vector-extra/insns/vclmul.adoc b/doc/vector-extra/insns/vclmul-32e.adoc similarity index 100% rename from doc/vector-extra/insns/vclmul.adoc rename to doc/vector-extra/insns/vclmul-32e.adoc diff --git a/doc/vector-extra/insns/vclmulh.adoc b/doc/vector-extra/insns/vclmulh-32e.adoc similarity index 100% rename from doc/vector-extra/insns/vclmulh.adoc rename to doc/vector-extra/insns/vclmulh-32e.adoc From 0083833fa1bb44283157d9c4eff3a8c4f6c90137 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Thu, 31 Aug 2023 16:57:15 +0200 Subject: [PATCH 15/26] fixing vector-extra build issues --- doc/vector-extra/riscv-crypto-spec-vector-extra.adoc | 2 +- .../riscv-crypto-vector-extra-introduction.adoc | 10 ++++++++++ ...32e.adoc => riscv-crypto-vector-extra-zvbc32e.adoc} | 0 ...zvkgs.adoc => riscv-crypto-vector-extra-zvkgs.adoc} | 0 4 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc rename doc/vector-extra/{riscv-crypto-vector-zvbc32e.adoc => riscv-crypto-vector-extra-zvbc32e.adoc} (100%) rename doc/vector-extra/{riscv-crypto-vector-zvkgs.adoc => riscv-crypto-vector-extra-zvkgs.adoc} (100%) diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc index f3adb3df..2dae82a0 100644 --- a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc +++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc @@ -127,7 +127,7 @@ progress. include::./riscv-crypto-vector-extra-zvbc32e.adoc[] <<< -include::./riscv-crypto-vector-zvkgs.adoc[] +include::./riscv-crypto-vector-extra-zvkgs.adoc[] <<< diff --git a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc new file mode 100644 index 00000000..fd7590b0 --- /dev/null +++ b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc @@ -0,0 +1,10 @@ +[[crypto_vector_introduction]] +== Introduction + +This document describes the proposed _vector_ _extra_ cryptography +extensions for RISC-V. +Those extensions extends the _vector_ cryptography extensions for RISC-V, +providing extra feature not mandatory for a high performace implementation but which +can help further improve the efficiency of the algorithms that use them. +All instructions proposed here are based on the Vector registers. + diff --git a/doc/vector-extra/riscv-crypto-vector-zvbc32e.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc similarity index 100% rename from doc/vector-extra/riscv-crypto-vector-zvbc32e.adoc rename to doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc diff --git a/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc similarity index 100% rename from doc/vector-extra/riscv-crypto-vector-zvkgs.adoc rename to doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc From 21479fda3b03bb9e4786ab53972232799caf48c3 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie <82109999+nibrunieAtSi5@users.noreply.github.com> Date: Thu, 31 Aug 2023 08:09:11 -0700 Subject: [PATCH 16/26] Fixing reserved encoding description for vclmulh Signed-off-by: Nicolas Brunie <82109999+nibrunieAtSi5@users.noreply.github.com> --- doc/vector-extra/insns/vclmulh-32e.adoc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/vector-extra/insns/vclmulh-32e.adoc b/doc/vector-extra/insns/vclmulh-32e.adoc index 6f536542..c90d8b5f 100644 --- a/doc/vector-extra/insns/vclmulh-32e.adoc +++ b/doc/vector-extra/insns/vclmulh-32e.adoc @@ -36,8 +36,9 @@ Encoding (Vector-Scalar):: ]} .... Reserved Encodings:: -* `SEW` is any value other than 64 (`Zvbcb`) -* `SEW` is any value other than 32 or 64 (`Zvbcb`) +* `SEW` is any value other than 64 (`Zvbc` only) +* `SEW` is any value other than 32 (`Zvbc32e` only) +* `SEW` is any value other than 32 or 64 (`Zvbc32e` and `Zvbc`) Arguments:: @@ -95,4 +96,4 @@ function clmulh(x, y, width) = { -- Included in:: -<>, <>, <>, <> +<> From e96eabc7dea0916823f4d11ba29359c6e1b4800e Mon Sep 17 00:00:00 2001 From: Nicolas Brunie <82109999+nibrunieAtSi5@users.noreply.github.com> Date: Thu, 31 Aug 2023 08:11:10 -0700 Subject: [PATCH 17/26] Fixing title page Signed-off-by: Nicolas Brunie <82109999+nibrunieAtSi5@users.noreply.github.com> --- doc/vector-extra/riscv-crypto-spec-vector-extra.adoc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc index 2dae82a0..322450b9 100644 --- a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc +++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc @@ -1,10 +1,10 @@ [[riscv-doc-template]] -= RISC-V Cryptography Extensions Volume II: Vector Instructions -:description: The vector cryptography extensions for the RISC-V ISA. += RISC-V Cryptography Extensions Volume III: Extra Vector Instructions +:description: The vector extra cryptography extensions for the RISC-V ISA. :company: RISC-V.org -:revdate: 08 August 2023 -:revnumber: v1.0.0 -:revremark: RC2 +:revdate: 31 August 2023 +:revnumber: v0.0.1 +:revremark: :url-riscv: http://riscv.org :doctype: book //:doctype: report From 8b5dab8c97008de310e86e6a65f9e3f68f4d210d Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Thu, 31 Aug 2023 08:16:31 -0700 Subject: [PATCH 18/26] Fixing Zvbc32e description --- .../riscv-crypto-vector-extra-zvbc32e.adoc | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc index 7bd8e84e..a5b6af26 100644 --- a/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc +++ b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc @@ -1,18 +1,23 @@ [[zvbc,Zvbc]] -=== `Zvbc` - Vector Carryless Multiplication +=== `Zvbc32e` - Vector Carryless Multiplication General purpose carryless multiplication instructions which are commonly used in cryptography and hashing (e.g., Elliptic curve cryptography, GHASH, CRC). -These instructions are only defined for `SEW`=64. +These instructions are only defined for `SEW`=32. +Zvbc32e can be supported when `ELEN >=32`. + + +Note:: The extension `Zvbc32e` is independent from `Zvbc` where the same instructions are defined for `SEW=64`. + When `ELEN>=64` both extensions can be combined to have `vclmul.v[vx]` and `vclmulh.v[vx]` defined for both `SEW=32` and `SEW=64`. [%autowidth] [%header,cols="^2,4"] |=== |Mnemonic |Instruction -| vclmul.[vv,vx] | <> -| vclmulh.[vv,vx] | <> +| vclmul.[vv,vx] | <> +| vclmulh.[vv,vx] | <> |=== From 34a114ea18ff699d66db210219ff18f4bb655663 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Thu, 31 Aug 2023 08:18:55 -0700 Subject: [PATCH 19/26] adding vector-extra copy of images/risc-v_logo.png --- doc/vector-extra/images/risc-v_logo.png | Bin 0 -> 11962 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 doc/vector-extra/images/risc-v_logo.png diff --git a/doc/vector-extra/images/risc-v_logo.png b/doc/vector-extra/images/risc-v_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..d754746ed2a3bbd7ad9b4aae9081e0e044493ee0 GIT binary patch literal 11962 zcmch7WmFq$+iq}or?|VjyIaxV5VUA;cPZ`^Dbgavg1ftWk>XlPaVak4(C+=d-*?ve ze`c-8W7l=-Jeiq0iPlh)M?)q?1^@tPiV8BC007kDYu^$P{`FJX87m0@Kvj4rEv=y_ zElr`}>TLba!3qFSh)&l-0BVim=NW1$+J+!1AazD-7AYfrmc>kBT0&H$_&kt=DRay= z&eYlEqZ@^>+gmeRSn^HzT&| z`@2otSZu9%L5q1Q7gyAQ6-@3uL8P~KZzjF~2cn7Uk{Nf>?^h*`>CpxCw?K#7`)NAyq5$|A)(aT~iP+>+!me~&rg{RP{ zb^P}ACiVQ?_j7XlQ145 zO06S4VIGQO*e~c2B$Z{eYWsW-9>&}cVMsy=V%9cE*|r_s!kUkI!OF#On~abayj3TC zV^_dSJc!AzY%`KVD2GZbhut5Y(1Ju3^|(8GPK^3q_%!Sh-Rdzzber>Y>Ul6Yenz&1 z*SJIBMpcxB1IoP@aXUMkyQ_+*ISi8LjYtN#r{td+Pz^>A!SeDjAT z&@sRhUpP89)(bV6I*!(|Vyk_pec}=Q zwNXfo!hIlY4$)VAE|(9b!Gn>`ft0b#+lgd@u3jEqFcSBqZ@a&kaO-R$g!^o>8~;WE z_>X_$I$|fCCJydTgfi=RROU07JHx;chNh7 z#S!7n%|xQ$qABoH!i@2IuUTNYdr4|A(?VTqpdoNNW;jd8o}r><;!r zyx=!gc~0jfS0T-207*=2DS5&uPEg1a)l`)5P8ckexdIP%ViDDW0zQ=_?g+_u;UW_K z2yIHkB%$v%-#J{d1ryz=nv)(CFfNhrqd*L3yb_Zd!tSVtVRf?%JXpCg)}QZfISoSD zq?u<8-1%3c@?_WxzRj>*39ckUsi0VZK4hZ&2y>|cvY?F*ST2b`P%C=uYT+Spzo+$T z*dPRt!8k7PQ+>u;$O!|fV6nW!>HdrzojbJ-Eb(~L-m~3+I|ArjGbJ$H9ac-YVmM2m ziWKAL(3)Iu6c}Dibv?XUhALUHf!?>jgfl%;HAmG*X-#5HSw!-fMvUerRe{DLg_|)L zS9vEHS)sV-x@f&8oEI{&MKC8ajPRGVOQS@5q;Imz+C{9#I>ze5QkLV1YZ2MW zomD6_qyGFhYu2RDUXDCof5?1DaELknW87Kx7M&U$j;eLG1COA^SK+T_MK21!N9vLv z<$pWHWD8{teVpQ3w_CAZakh5CH>cw@boLyBe9L)9=8)&uHT0C}lL*&OH6!IvJG9d> z$o+vbc5=*0r%tW@fkl;-nicPZAfO$}L1J^p{tZTS#czmU1(1$8DQ$Un8F+ArC!ISj%RN>9emFIGVb?P;XF8TW5 z@!Bbi2n+s%&V$kOoUQvXn~HrKc4zi1y)eDR=6J6*ulQRua+rL9{9>_vf21zTu3rI( zcY)`>E}73)&u&Y)V}lZ2*j`F@kB5=3;}T20QK+?Wi++99Sw5yZVr*d5Lq7sQH*kPH71(e(b{J#BUOox&US`931R?tG`PvHwV zrvpnv3ZQI|Bn!W%$ML$rW6M*EDIGTbkSZkaPGEyIQ0-|MR<7?V+!)CjM;beWenZVw zL6fGD?ubTRu}V2G13SBpso4H&rlK3Yu?nvuUI|}G^4FBPrjl!Q^p72$uD7_;jKx}2 z>W%pZJl9V=sqb3n64btBO??cc$W6*i>0*AyVgH~vel)#hJ#Tq+A^7C+v>$5LODvhl zAtiC~eCH*X;o3C3PpLUf^{=3%RBpf`rf8z^S!@s)4ln@9sz6-Rz zoPd$_l}#yV(pRhhFyX67V)8WS;LjK2c|zJ{ z*!tD{z}(f)r~WEXQCqDzKfjQ(vgYIcw7rP`@%&uQ;9b^=e`cxf{NZ^F>;E>gpovfpmuxt)~4!q8=XtZthxx-Q=epbH)98_oe1! zt7GFG^Bms?{(3smXxH=Gr9D%gB<&()&D|ABG)8apzf}5}Zd&bHZPJ17R{6{K>-9(e zFKa*B7+dbIbWbZYx(R-Zf&F?(`*Q2r+q{@hvo@2L=UF6m7J<4UWMPfi+p^7JhoolOE@2p`v%Q-fW2MLr?6aD{UXoG?^V-uDM%FJ)eW`-s#Y=$p`jB*nfMEI&O2Q6mtKI<&HtXNo z+IQqwHU#91=;gDgONHBnO(rPKZ^*%Ij&(yd9t0XVFUj4#zFHuG74+Qz0DRg%CzPTl z&H3w_NA5dqpgT}iMaaV0kIl2Ao)~kc;e^%Hz**Mt$EBloy@@G~^DCtrvCphIXV6>=Kosxmnp*j$ASOl&_B`oJNp_g zF=P?;|At-+`7F7T=rtQ8?_|`qUt8!usd+uPUQdR9+t)s3`moePz-w!+C?l!u4Rvgc z;*KkeACQ_gp+Zp#3x_RXjENXUb(tI#EHh7Shv^d5UU1CBP}W=vk&Nb+eLVZ??jb=jA6oSQ~;uG*&Drj5ILYBWpbwxV3G z+3vx!)AxD|4AMwaoW*{9zI)cGJs{T&ZUK0M% z@fV6q)iY3zslpcXSM8rE&>rVstNuV4WP+o0C^L<4|1tD+YGBOb9}8ch8c_h83Mr0O zhQE?t1@?#y5DOt+zi*QK`JQ#hE%t5=9#iDFl+5*hMQYLZGe7-5z>o&nSL8|yN*^U+ zKs|b>sR+jBYG%NIS35nOZu>3f(eU4>Tq7=J0I63^9!cj<452;Rf5@6NuD_^2nvSq_ zqyCs1QbfIl3VMe7aGtFG$@&*>aK5LC3dHjmZ!!As@^`8&22lD8tXMW$Z)faxy;c7< z|C0gYBi+@X{W1RrgN7VR9-FyOyZI5zoS02r48&>ooyZ;)e24$n<#(aM7SVW*^yj@Q zh=IRz9NS`|7x;{O|6wO_!IG##q72L)Co!Kc2h*Sk^*M3uX^5Z1|8ftagLk9xq;xcl zIbr)6H@|csWC67{;2{5`g;Boeet?MrHQ-uk-YF}^mqSoOX6!K-?{8)(77zziA)*gz z>Iz=@(=dXS^^M(5#%|_IpZp1h1jr4n9(S3VJ?CVwGhZLcQ z+miywM)@JY!S9S)(?=TK6DDI^Hl-NxL>!EW?tcvK(hM2kU*Zlj26&~F;A;!uwyTx-`*zhU>+liZq*tW?FCr|Xdz|kp(!qBM z@$m*(nM}Ia%<~AJ9!@@ER?!($tp(b?)kfNC+!#wjwEN6%8o6@9pB#Ih=fdu-)+to} zG~9mEA3v8L@!aQ{_KnPR(eIru?9-$!WE~0t{Wsvgua2VxLi3rxyDYde8ZX`da@PKn znJq?{^RfCirO~xq^AZ>ShUyKmz0d^ky3Kw{l$PTL^7H3v}=<8kPl zHYT&!8W}?n+=uG5wU#^Ui*-#99%4k7ue5-(8lA=70S{S+ji}TI^)|Cox#KHB7EfP) z>2hM#9)0RTN!Cx!R#KXsc9%Gq_}ImBu^#!P0;#!LUC>}C+lzuSq|}O&Xx{O&h;OEC zq^_&^640&I{csvDa|ks03sNE$I`OtwmvknaMjt!nG6ZfC(g$M1$G+;1CscB%cF`(9 z><1h^H#oej52UQk2D+3kJlR78oKk;-cG!VvM8nq&dkmGU_n7l1K5>wbK_kjZ$!-Z% zB3%u5@vVAqOxe4^vDPPc#~D=LdiG@CGV&KY__Ufu4rNzynEAe6DkJnkgojgCaF<*N zV8#7D&{P;iMeFbu)Q-2i_Hi(DrvIq1#;g6pT?RU%k%AOkjc1>*@Vb7d3j&QQg?}m$QkvDN2K5MT_r1E7q%?_w4p571ySHH}?l_lt|k*tp#abX(eHiT4q zj!cFa`w^@BtV^_BtH;&tXoB0Rjge*AMg+-5@K>>g;jA{q%p2dn%!KHD_B_&F_tQ1C z1T_+Gf7#Ot0_b8^JSFojFa=bNAbiNe-(`VTbyM?FdVO3MYbd+C7=!a%W2M8JPO5)= z5P?5U$Vk~sr3AL`37M}1#_Q@t64c3c!c;*zN-qa#_lSL}^nFVB4AZ;y1)XL=uDg}f zc2mi>)7b!9zOJhnACB@Zcc8!-#@lh;wrxUCfY}Xf%lE!Oxs#CdX>#f72RF1m2qw5$ zn7|(? z$G(nt(T#=~2-I^Ih)UPSIm&5e=E2wkdVJ{@y$>#E;3ZYWb5SI&jcJ=Wjr){@Q|zoW zqFqhOIJL4N%@p&X8BDYMay(V@{UDN6&UboZ$f6~vHy149T^!WG`+IJVL>kgLEdcjH zRNk$-%3wKpUXUrmA0`jRtDP!cmiR+4zXB>ZuNG9{`wS(}pd+O$!QczuYEIjy3u#8( z)tRT&wcG2%#!Xv?!#&0C29po78ZW=(8>kR}Xd6gIE5hAAt2(52gc(Jh9m8#%ax7|e zT^wtPal1suyzT&tL61d+3cX$ZDsx&P=mg3|Pshaf9?lz;bKzey%6~5_XG#Y;`1Tht z=V?{Jo@Lz{YWrS+_9&v+lEu6@f?=zYWt5qsb%bPODwuPtw4!-)ZSF?t79Yqr!?NQo zM*3%R{V4AqL}fqdvDtnNcO1H~TVb8jmN0^)En4_r4tcmDh(zR>ng!ST=j*iHh(AFm8YN&a3NS;Z@se~sRr(iuK#^`bFeeY+=6Pk^dksDIf zd)n!(^jwX{Tex`T7vZPj8qyzhzIjVO;M2>u6->@rv}-m@{-YxG2=6~{deLp zS6UVp4d0({E0UaxeJ`1!@l<8g!i(Budad1*j6Kt%zm1~Upx#OKSrVI0gTQd8bpWa* zXj)!aeV{Uy4iiq*gONiu5fmnR%Gh;|2vkQWIo+rdrQ!$M-=xfa_@}se-S?myrd|GB zw;weWK%*}b@~^vAc^LD^N4B%^t`{3pn%@Tt$qFq*E^SX#`%NTcvLKV_Q~!-SGK zl~F7zs7s6HW4#QkXL{MRbv6?9)f95}5j{dQ!yCWdZym(G339%MwfIi!V?`1dnv_q1S zQ@xqGHC61lKB$94sU?J2kwsLouF{M0LoJlbB}T4|E^aoaBw9nWZTggvaFAK+mlM)_ z&@DQrU!~)}p>%R(5nK`h0~Rrso&|5iI#El5~skMbzrmQIVamcNL*h?mF8C zfjyzAhW)hk%ndyyN?oOHLAVm{E66&Zs*JqmtWoATg}p zV!L$IZ&Bpjy)B2yiCNGP)4n9Z(d=%^pHT40osg@v-6xoX0hVpb5Nl{AccG?nDBs?M zJ=Tq*mJi^-V<^kGFp%xEWHCTY15_#i?S6k=z$J;Z)O_6wyNJU$2MW#WeFaennY)rd z($ZXv)O%(k)Ktf!uD={jzwfenj*JW2Jd4O#T%M_wMi*4)krY`X!4PPsdb3q zcMT=SI9|pzUkNOa#=D5Isaz-%DBSQC*@VWz8y%UA5@IV0*teVj9poW{WCNgAog=q0 z*oQPvGX1PUzsOQKca+!jhGlR|-&0i${Qf5HKx|7)~|EH59({z2~$I-BTvsK+GL(Mw#b0XKF{6~sktsqkM*++w15U*{TC z09_CaF)FB_g2jYW#l+Q?k-1QaB8Y|_wWP~h_AN|RoOZwMI6M{fy*$i6DEHM_e?GAc z)Y7Ro5q_cy37PMj^Omg;{*#OUelBoD4$xzrM@Gm!K=wFcPWxNf2l z5cJ)XuJq96PSZ4VS^yhyrDVJU8m)t}gTI)Ar9d?F-kj4T*F06z#*F zV9Vc%t3&X-=bIADsDZ%I`8cAn z7^Xa0-M*NB!WOmMR{X`THk^7iDpnsm8uwPzZGf-N?agN#^^)bwd$|WR zfnoc=UrL-40f(v#tjrkr#mD=CHv2u
?)Ta!C4k|u#yvl>Ubljge>7_c9CTc*R2 zD?Xh(9*~01X7YRxgD9#z;kJa@DLR1sXB)n(TNF_#B+G&!vrGaxxW9^;YE^*mQIFAI z2<2a_Kyfh`9SA={yk1lQL6x;^1r18VpQ$a4?}>w{MPsZ=eRqoRYODiXK1rNq_?JfT z%Fic<@Y*OFv8!fVN}Ajza9GQz+vd*o%|r}H1H%X+q1#=?O&A`wC`g2VpyQXU?WZ9; zO6R{3uGrAK(@ZW;W-#i609KmqD|K?e?nUG4h8r655ZZNb%IvbpRS4J>tIaL?-*S0- zCKg_^Lb!y*Q4Z*DX;$g$KU?+1@C!c9O3rYfg=Vf*cV8No;$w_@eSglpBL9iIf7{*( zmoVfwy1%5+f-tnvUNj{}BoARwIT=Kxep^=5VC0L-5AI2a!WukU%yvy{&G$yOe?ICX z7tah7J&c?dV=psJcOQPkAH>qA>+jKw&T&?|s&iI2jE0e#Cn&ZbjpJK!lYAJ_mA?VE zdBM)wK6!=Ht`y+49qqzISQ74Hmze_2#rtMh_{D%<+vym3OAvbj7wxNo5wyM@glc)E zcSO_NqxGx#vYVIWu$y}0@y7Eodgy)+4k$UuaYC-FH+E7Wh*;o-M5u^jX-LZ8d>;!sqv&QCu=Z?GeR?jQB*s*Et+O|s34mUwOQ%7CeYbL#5e?6meo+j{+J~0Z_7B1NL6#NBaGiPru*^IEIqmX?Q_$yNB{E(kBaj$ z3|D)&T;A+T0k_9Xw>Xra*l#)WvA*)2w;8<$v(c$~^Lva4x@tIeHJVW`*4(lJAXek9 z_<0HY1L+?=hR@V7?%NKyR20H(PyMY)3y#vI0ZYl<<+$a1#%g46!mAtTM5N@%M}r&s z;}Li+pDg-g1}!MLwVbeCYduh$o83_7y#w5EA{(wKP)3s@CAsxmXqnyTm56@$i-tJx6P5JV%Hk0 z?>7+}r62p+4#9zx4c=T=YAy`+4+Q^2BbNS#H1ji?D)?%M$L}L>PzHFh;Z}VJ_(iT~ z8Z-;qW!{84^>U_r`8sJX?;FvHYB4crjgx#n>~b4@*G^mW*|vE3z-YNWlh0;}?3hRcul4DE5^>3#(`zE0?9yPVf`m7p?4+!>M~ZLF@**Oc%{o z@-3mD*bC^Kbi~1=E^32oyvn`6Gi3SK&%1v1PP<4e7d8%v-eTDR-c8H^%7MVSpJje*HhaXes@7!s^1+VGT}vL ziSyN*n(d6SM9ycF+iqAOZQh4W2LJwF~7NCl}^C&>%mRy`k|DtdqkWFyuP6Bdf)m05-#eK8s{(%uWPyL%!L0c2QpHT-vhvt*>%P{Pd|Na8fy#GC_w^&w@NttH0&jHuAdmENrm#Mkuu&NpDJO2}$`tPWNU&=2|!?Gw`cUa1iRew&4-V0MrnBzHRjDCi$$< zc}z^w6vropd|f3{rv!{o=t& z!Ev_p(XLir-MjJa+|}}t`RrF#8aI4h>@Pypb`&9IFh*hA*c!nRyb3dI>O>`VkxyX* z34Kg&=SO`^ukDx%ET5M-t-+PYVyi7O!L85 zNpdhhKj73}KZ@zWc+tu7rqU-LmeZ_7x;C-y8LkTzt2o4*bY zq3~Kmima|?5yo*R>97X(1h?gfknd9g{Pv8cjX4~aJmK!|hl-%68wHOeD;pFK;d)7{ zzHq$dEkp3Rqkv{!AMY^ql*;d2)8vb`6?NEeS%o7vceMVoQR#=3Wwgym#C0|BiX%b9 z7i7f=c|hP^zPvXI?m}5ALQ>#Jc(cyyJs4|tjY+To=R4I`hx9vz6AXN zDD(js{DeX@T3T%&Rvo4KcKO)=Urt`paZ3J|FZ`j$-@tMJDdnolnUtg z?n5K^BuW4;eBA^2&$bfJP=&TK6ITp zjNy2SZ!U0$lHL%kqL*V$cI+1kq-UqXG=pH0y;4YM%P1fUU+eb+ zOYeQlaObrX5FiLt@51p!BlJ3f0GkHKGOtc7kBw*Up2zzjs+!5gujYRk8bA zEo$7tsPO&bNLJh{(k%MWUk$yAzesj`C>5z8-gETL4)7HjcnZGAvGEh;P>-bXD^Zoa zpa)9Pj}xw2H9j?N{GtpUy2BT^x|Q}@u5>-HnE)IGl8_>7-2t-^t)+eR4x?{+7jJ_t zb-#HeEq35!9rg`IoG;~k8@cg~{PS(xl7PekMVIsdo1zB9YQ<7FzkB`TU{?Y|VSm_n z`uAE1&FDF3gtnV81ATUIvZrBxz-FexN5qYeW>U!T(F0X~&rB#w}I+NAlu z6$W7#T{cqs6Gjyuqc_O6JxhKW8Y;yOKwvSrmJ`PeT7_Z{k*2YR%CRu7&sY5rtPl77 z_9OamqTW#!QMq!usC}eU-WyJ+zfqv+tl8m~Qs3T-J%uls%l=6U6FCuraZ3_2y1S2M zuTtY$@<%b$DrQYY4b|k+k#O%-(C+xuWU(x}L&QO1H>e6Hcaa4m1a67>rasNl$1eTdO?{K0a_rG`zOfz$xtu565i2MJp{!Tn&pQ1!a&RF2}bswN8t0q$= IWfuB>05)+qbN~PV literal 0 HcmV?d00001 From ea3471aa6ab8d577fec34e369ef6d667e09ce441 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Thu, 31 Aug 2023 08:21:56 -0700 Subject: [PATCH 20/26] vclmul description --- doc/vector-extra/insns/vclmul-32e.adoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/vector-extra/insns/vclmul-32e.adoc b/doc/vector-extra/insns/vclmul-32e.adoc index e1874bf2..fbd9e886 100644 --- a/doc/vector-extra/insns/vclmul-32e.adoc +++ b/doc/vector-extra/insns/vclmul-32e.adoc @@ -69,9 +69,9 @@ significant SEW bits of the carry-less product. [NOTE] ==== -The 64-bit carryless multiply instructions can be used for implementing GCM in the absence of the `zvkg` extension. -We do not make these instructions exclusive as the 64-bit carryless multiply is readily derived from the -instructions in the `zvkg` extension and can have utility in other areas. +The 32-bit carryless multiply instructions can be used for implementing GCM in the absence of the `zvkg` extension. +In particular for implementation with `ELEN=32` where `Zvkg` cannot be implemented. +It can also be used to speed-up CRC evaluation. ==== Operation:: From ed8f89e9207e200537374ac9c8e2569fb0ee20e5 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Wed, 17 Jan 2024 16:42:08 -0800 Subject: [PATCH 21/26] fixing a few typos + clarification --- doc/vector-extra/insns/vclmul-32e.adoc | 8 ++++---- doc/vector-extra/insns/vclmulh-32e.adoc | 8 ++++---- doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/vector-extra/insns/vclmul-32e.adoc b/doc/vector-extra/insns/vclmul-32e.adoc index fbd9e886..ee46d468 100644 --- a/doc/vector-extra/insns/vclmul-32e.adoc +++ b/doc/vector-extra/insns/vclmul-32e.adoc @@ -49,9 +49,9 @@ Arguments:: |Direction |Definition -| Vs1/Rs1 | input | multiplier -| Vs2 | input | multiplicand -| Vd | output | carry-less product low +| `vs1`/`rs1` | input | multiplier +| `vs2` | input | multiplicand +| `vd` | output | lower part of carry-less |=== [NOTE] @@ -60,7 +60,7 @@ Arguments:: ==== Description:: -Produces the low half of 128-bit carry-less product. +Produces the low half of `2*SEW`-bit carry-less product. Each SEW-bit element in the `vs2` vector register is carry-less multiplied by either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value diff --git a/doc/vector-extra/insns/vclmulh-32e.adoc b/doc/vector-extra/insns/vclmulh-32e.adoc index c90d8b5f..e10c38c2 100644 --- a/doc/vector-extra/insns/vclmulh-32e.adoc +++ b/doc/vector-extra/insns/vclmulh-32e.adoc @@ -49,9 +49,9 @@ Arguments:: |Direction |Definition -| Vs1 | input | multiplier -| Vs2 | input | multiplicand -| Vd | output | carry-less product high +| `vs1`/`rs1` | input | multiplier +| `vs2` | input | multiplicand +| `vd` | output | upper part of carry-less |=== [NOTE] @@ -60,7 +60,7 @@ Arguments:: ==== Description:: -Produces the high half of 128-bit carry-less product. +Produces the high half of `2*SEW`-bit carry-less product. Each SEW-bit element in the `vs2` vector register is carry-less multiplied by either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value diff --git a/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc index a5b6af26..6881c8dc 100644 --- a/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc +++ b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc @@ -1,4 +1,4 @@ -[[zvbc,Zvbc]] +[[zvbc32e,Zvbc32e]] === `Zvbc32e` - Vector Carryless Multiplication General purpose carryless multiplication instructions which are commonly used in cryptography @@ -8,7 +8,7 @@ These instructions are only defined for `SEW`=32. Zvbc32e can be supported when `ELEN >=32`. -Note:: The extension `Zvbc32e` is independent from `Zvbc` where the same instructions are defined for `SEW=64`. +Note:: The extension `Zvbc32e` is independent from `Zvbc` which defines the same instructions for `SEW=64`. When `ELEN>=64` both extensions can be combined to have `vclmul.v[vx]` and `vclmulh.v[vx]` defined for both `SEW=32` and `SEW=64`. [%autowidth] From 1bfa9e36da5c5d23c4f7f18cf6bd4659aec201fb Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Thu, 1 Feb 2024 09:51:13 -0800 Subject: [PATCH 22/26] [vector-crypto-extra] typo fix and improvements --- doc/vector-extra/insns/vclmul-32e.adoc | 4 ++-- doc/vector-extra/insns/vclmulh-32e.adoc | 2 +- .../riscv-crypto-vector-extra-inst-table.adoc | 2 +- .../riscv-crypto-vector-extra-introduction.adoc | 4 ++-- .../riscv-crypto-vector-extra-zvbc32e.adoc | 4 ++-- doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc | 10 +++++----- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/doc/vector-extra/insns/vclmul-32e.adoc b/doc/vector-extra/insns/vclmul-32e.adoc index ee46d468..1c24c15d 100644 --- a/doc/vector-extra/insns/vclmul-32e.adoc +++ b/doc/vector-extra/insns/vclmul-32e.adoc @@ -51,7 +51,7 @@ Arguments:: | `vs1`/`rs1` | input | multiplier | `vs2` | input | multiplicand -| `vd` | output | lower part of carry-less +| `vd` | output | lower part of carry-less multiply |=== [NOTE] @@ -70,7 +70,7 @@ significant SEW bits of the carry-less product. [NOTE] ==== The 32-bit carryless multiply instructions can be used for implementing GCM in the absence of the `zvkg` extension. -In particular for implementation with `ELEN=32` where `Zvkg` cannot be implemented. +In particular for implementation with `ELEN=32` where `Zvkg` cannot be implemented. It can also be used to speed-up CRC evaluation. ==== diff --git a/doc/vector-extra/insns/vclmulh-32e.adoc b/doc/vector-extra/insns/vclmulh-32e.adoc index e10c38c2..3800452d 100644 --- a/doc/vector-extra/insns/vclmulh-32e.adoc +++ b/doc/vector-extra/insns/vclmulh-32e.adoc @@ -51,7 +51,7 @@ Arguments:: | `vs1`/`rs1` | input | multiplier | `vs2` | input | multiplicand -| `vd` | output | upper part of carry-less +| `vd` | output | upper part of carry-less multiply |=== [NOTE] diff --git a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc index 01c1bd23..d52d3ff5 100644 --- a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc +++ b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc @@ -27,7 +27,7 @@ The new/modified encoding are in bold and underlined. |100000 | | | | | 100000 |V| | vsm3me | 100000 | | | | 100001 | | | | | 100001 |V| | vsm4k.vi | 100001 | | | | 100010 | | | | | 100010 |V| | vaesfk1.vi | 100010 | | | -| 100011 | | | | | 100011 | | | __**vghsh.vs**__ | 100011 | | | +| 100011 | | | | | 100011 |V| | __**vghsh.vs**__ | 100011 | | | | 100100 | | | | | 100100 | | | | 100100 | | | | 100101 | | | | | 100101 | | | | 100101 | | | | 100110 | | | | | 100110 | | | | 100110 | | | diff --git a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc index fd7590b0..c01afa59 100644 --- a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc +++ b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc @@ -3,8 +3,8 @@ This document describes the proposed _vector_ _extra_ cryptography extensions for RISC-V. -Those extensions extends the _vector_ cryptography extensions for RISC-V, -providing extra feature not mandatory for a high performace implementation but which +Those extensions extend the _vector_ cryptography extensions for RISC-V, +providing extra features not mandatory for a high performace implementation but which can help further improve the efficiency of the algorithms that use them. All instructions proposed here are based on the Vector registers. diff --git a/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc index 6881c8dc..9cf42177 100644 --- a/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc +++ b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc @@ -16,8 +16,8 @@ Note:: The extension `Zvbc32e` is independent from `Zvbc` which defines the same |=== |Mnemonic |Instruction -| vclmul.[vv,vx] | <> -| vclmulh.[vv,vx] | <> +| `vclmul.[vv,vx]` | <> +| `vclmulh.[vv,vx]` | <> |=== diff --git a/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc index 40787c63..f54683f4 100644 --- a/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc +++ b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc @@ -6,11 +6,11 @@ Instructions to enable the efficient implementation of parallel versions of GHASH~H~ which is used in Galois/Counter Mode (GCM) and Galois Message Authentication Code (GMAC). -The instructions inherit the same constraints (element group size, data independent execution timing and `vl`/`vstart` multiple constraints). +The instructions inherit the same constraints as the ones mandated for `Zvkg` instructions: (element group size, data independent execution timing and `vl`/`vstart` multiple constraints). -All of these instructions work on 128-bit element groups comprised of four 32-bit elements. +All of these instructions work on 128-bit element groups comprised of four 32-bit elements, in element group parlance `EGS=4`, `EGW=128` and the instructions are only defined for `SEW=32`. -To help avoid side-channel timing attacks, these instructions shall be implemented with data-independent timing. +To help avoid side-channel timing attacks, these instructions shall always be implemented with data-independent timing. The number of element groups to be processed is `vl`/`EGS`. `vl` must be set to the number of `SEW=32` elements to be processed and @@ -25,8 +25,8 @@ Likewise, `vstart` must be a multiple of `EGS=4`. |EGW |Mnemonic |Instruction -| 32 | 128 | vghsh.vs | <> -| 32 | 128 | vgmul.vs | <> +| 32 | 128 | `vghsh.vs` | <> +| 32 | 128 | `vgmul.vs` | <> |=== From eff2e907976e475db69257708ce38d76130cc6dd Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Thu, 1 Feb 2024 09:59:24 -0800 Subject: [PATCH 23/26] [vector-extra] updating revnumber to v0.0.3 --- doc/vector-extra/riscv-crypto-spec-vector-extra.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc index 322450b9..040573e8 100644 --- a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc +++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc @@ -2,8 +2,8 @@ = RISC-V Cryptography Extensions Volume III: Extra Vector Instructions :description: The vector extra cryptography extensions for the RISC-V ISA. :company: RISC-V.org -:revdate: 31 August 2023 -:revnumber: v0.0.1 +:revdate: 1 February 2024 +:revnumber: v0.0.3 :revremark: :url-riscv: http://riscv.org :doctype: book From 4ce6a83830f9241eb45d7258684310ec7e499f09 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Tue, 6 Feb 2024 20:14:19 -0800 Subject: [PATCH 24/26] [v0.0.4] applying internal review feedback --- .../riscv-crypto-spec-vector-extra.adoc | 33 ++++++++++--------- .../riscv-crypto-vector-extra-inst-table.adoc | 4 +-- ...iscv-crypto-vector-extra-introduction.adoc | 6 ++-- .../riscv-crypto-vector-extra-zvkgs.adoc | 18 ++++++---- 4 files changed, 34 insertions(+), 27 deletions(-) diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc index 040573e8..3dfd4ce5 100644 --- a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc +++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc @@ -1,9 +1,9 @@ [[riscv-doc-template]] -= RISC-V Cryptography Extensions Volume III: Extra Vector Instructions -:description: The vector extra cryptography extensions for the RISC-V ISA. += RISC-V Cryptography Extensions Volume III: Additional Vector Instructions +:description: The addtional vector cryptography extensions for the RISC-V ISA. :company: RISC-V.org -:revdate: 1 February 2024 -:revnumber: v0.0.3 +:revdate: 6 February 2024 +:revnumber: v0.0.4 :revremark: :url-riscv: http://riscv.org :doctype: book @@ -46,7 +46,7 @@ endif::[] [colophon] = Colophon -This document describes the Vector Cryptography Extra extensions to the +This document describes additional Vector Cryptography extensions to the RISC-V Instruction Set Architecture. This document is _Discussion Document_. @@ -73,6 +73,7 @@ for more information. Contributors to this specification (in alphabetical order) include: + +Eric Biggers, Ken Dockser, Markku-Juhani O. Saarinen, Nicolas Brunie, @@ -95,26 +96,28 @@ include::riscv-crypto-vector-extra-introduction.adoc[] [[crypto_vector_extensions]] == Extensions Overview -The section introduces all of the extensions in the Vector Cryptography Extra +The section introduces all of the extensions in the Additional Vector Cryptography Instruction Set Extension Specification. -All the Vector Crypto Extra Extensions can be built +All the Additional Vector Crypto Extensions can be built on _any_ embedded (Zve*) or application ("V") base Vector Extension. // See <> for more details on vector element groups and the drawbacks of // small `VLEN` values. -All _cryptography-specific_ instructions defined in this Vector Crypto specification (i.e., those -in <>, but _not_ <>) shall -be executed with data-independent execution latency as defined in the +As the instructions defined in this specification might be used to implement cryptographic primitives + they may be implemented with data-independent execution latencies as +defined in the link:https://github.com/riscv/riscv-crypto/releases/tag/v1.0.1-scalar[RISC-V Scalar Cryptography Extensions specification]. -It is important to note that the Vector Crypto instructions are independent of the -implementation of the `Zkt` extension and do not require that `Zkt` is implemented. -//This specification includes a <> extension that, when implemented, requires certain vector instructions -//(including <>, <>, and <>) to be executed with data-independent execution latency. +If `Zvkt` is implemented, all the instructions from `Zvbc32e` (`vclmul[h].[vv,vx]`) +shall be executed with data-independent execution latency as + +Whether `Zvkt` is implemented or not, all instructions from `Zvkgs` (`vgmul.vs`, `vghsh.vs`) +shall be executed with data-independent execution latency. + Detection of individual cryptography extensions uses the unified software-based RISC-V discovery method. @@ -134,7 +137,7 @@ include::./riscv-crypto-vector-extra-zvkgs.adoc[] // ------------------------------------------------------------ -[[crypto_vector_extra_insns, reftext="Vector Cryptography Extra Instructions"]] +[[crypto_vector_extra_insns, reftext="Additional Vector Cryptography Instructions"]] == Instructions diff --git a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc index d52d3ff5..ee5a09c6 100644 --- a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc +++ b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc @@ -1,9 +1,9 @@ [appendix] [[crypto_vector_instructions]] -=== Crypto Vector Cryptographic Instructions +=== Additional Vector Cryptographic Instructions OP-P (0x77) -Crypto Vector instructions, including Zvkgs, except Zvbb and Zvbc +Additional Vector Crypto instructions, including Zvkgs, except Zvbb and Zvbc The new/modified encoding are in bold and underlined. // [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"] diff --git a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc index c01afa59..8d057e6a 100644 --- a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc +++ b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc @@ -1,10 +1,10 @@ [[crypto_vector_introduction]] == Introduction -This document describes the proposed _vector_ _extra_ cryptography +This document describes the proposed _additional_ _vector_ cryptography extensions for RISC-V. Those extensions extend the _vector_ cryptography extensions for RISC-V, -providing extra features not mandatory for a high performace implementation but which -can help further improve the efficiency of the algorithms that use them. +providing additional features not mandatory for a high performace implementation but which +can help further improve the efficiency some algorithms (e.g. CRC, AES-GCM). All instructions proposed here are based on the Vector registers. diff --git a/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc index f54683f4..c8d83965 100644 --- a/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc +++ b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc @@ -1,16 +1,20 @@ [[zvkgs,Zvkgs]] === `Zvkgs` - Vector-Scalar GCM/GMAC -`Zvkgs` depends on `Zvkg`, it extends the existing `vghsh.vv` and `vgmul.vv` instructions with new vector-scalar variants: `vghsh.vs` and `vgmul.vs`. - Instructions to enable the efficient implementation of parallel versions of GHASH~H~ which is used in Galois/Counter Mode (GCM) and Galois Message Authentication Code (GMAC). -The instructions inherit the same constraints as the ones mandated for `Zvkg` instructions: (element group size, data independent execution timing and `vl`/`vstart` multiple constraints). +`Zvkgs` depends on `Zvkg`. It extends the existing `vghsh.vv` and `vgmul.vv` instructions with new vector-scalar variants: `vghsh.vs` and `vgmul.vs`. + +The instructions inherit the constraints defined in `Zvkg`: + +- element group size (EGS) is 4 +- data independent execution timing +- `vl`/`vstart` must be multiples of EGS=4multiple constraints -All of these instructions work on 128-bit element groups comprised of four 32-bit elements, in element group parlance `EGS=4`, `EGW=128` and the instructions are only defined for `SEW=32`. +All of these instructions work on 128-bit element groups comprised of four 32-bit elements. -To help avoid side-channel timing attacks, these instructions shall always be implemented with data-independent timing. +To help avoid side-channel timing attacks, these instructions shall be implemented with data-independent timing. The number of element groups to be processed is `vl`/`EGS`. `vl` must be set to the number of `SEW=32` elements to be processed and @@ -25,8 +29,8 @@ Likewise, `vstart` must be a multiple of `EGS=4`. |EGW |Mnemonic |Instruction -| 32 | 128 | `vghsh.vs` | <> -| 32 | 128 | `vgmul.vs` | <> +| 32 | 128 | vghsh.vs | <> +| 32 | 128 | vgmul.vs | <> |=== From acaf911873fddcd0e240532ccfb0622c73ff78d9 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie <82109999+nibrunieAtSi5@users.noreply.github.com> Date: Tue, 6 Feb 2024 20:25:50 -0800 Subject: [PATCH 25/26] Apply suggestions from code review Signed-off-by: Nicolas Brunie <82109999+nibrunieAtSi5@users.noreply.github.com> --- doc/vector-extra/riscv-crypto-spec-vector-extra.adoc | 2 +- .../riscv-crypto-vector-extra-inst-table.adoc | 2 +- doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc index 3dfd4ce5..53da28cc 100644 --- a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc +++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc @@ -113,7 +113,7 @@ defined in the link:https://github.com/riscv/riscv-crypto/releases/tag/v1.0.1-scalar[RISC-V Scalar Cryptography Extensions specification]. If `Zvkt` is implemented, all the instructions from `Zvbc32e` (`vclmul[h].[vv,vx]`) -shall be executed with data-independent execution latency as +shall be executed with data-independent execution latency. Whether `Zvkt` is implemented or not, all instructions from `Zvkgs` (`vgmul.vs`, `vghsh.vs`) shall be executed with data-independent execution latency. diff --git a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc index ee5a09c6..8bd81a2d 100644 --- a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc +++ b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc @@ -3,7 +3,7 @@ === Additional Vector Cryptographic Instructions OP-P (0x77) -Additional Vector Crypto instructions, including Zvkgs, except Zvbb and Zvbc +Vector Crypto instructions, including `Zvkgs`, except `Zvbb` and `Zvbc`. The new/modified encoding are in bold and underlined. // [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"] diff --git a/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc index c8d83965..99155dc5 100644 --- a/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc +++ b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc @@ -10,11 +10,11 @@ The instructions inherit the constraints defined in `Zvkg`: - element group size (EGS) is 4 - data independent execution timing -- `vl`/`vstart` must be multiples of EGS=4multiple constraints +- `vl`/`vstart` must be multiples of EGS=4 -All of these instructions work on 128-bit element groups comprised of four 32-bit elements. +All of these instructions work on 128-bit element groups comprised of four 32-bit elements, in element group parlance `EGS=4`, `EGW=128` and the instructions are only defined for `SEW=32`. -To help avoid side-channel timing attacks, these instructions shall be implemented with data-independent timing. +To help avoid side-channel timing attacks, these instructions shall always be implemented with data-independent timing. The number of element groups to be processed is `vl`/`EGS`. `vl` must be set to the number of `SEW=32` elements to be processed and @@ -29,8 +29,8 @@ Likewise, `vstart` must be a multiple of `EGS=4`. |EGW |Mnemonic |Instruction -| 32 | 128 | vghsh.vs | <> -| 32 | 128 | vgmul.vs | <> +| 32 | 128 | `vghsh.vs` | <> +| 32 | 128 | `vgmul.vs` | <> |=== From 38d0834a6785f4641d613851b87801d011c79ce6 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Thu, 7 Mar 2024 20:56:49 -0800 Subject: [PATCH 26/26] Fixing typos / corrections / introducing Zvbc32e inst table --- doc/vector-extra/insns/vclmul-32e.adoc | 2 +- doc/vector-extra/insns/vclmulh-32e.adoc | 6 +++--- doc/vector-extra/insns/vghsh-vs.adoc | 12 ++++++------ doc/vector-extra/insns/vgmul-vs.adoc | 10 +++++----- doc/vector-extra/riscv-crypto-spec-vector-extra.adoc | 8 ++++++-- .../riscv-crypto-vector-extra-inst-table.adoc | 5 ++--- .../riscv-crypto-vector-extra-introduction.adoc | 5 +++-- 7 files changed, 26 insertions(+), 22 deletions(-) diff --git a/doc/vector-extra/insns/vclmul-32e.adoc b/doc/vector-extra/insns/vclmul-32e.adoc index 1c24c15d..7a47de1c 100644 --- a/doc/vector-extra/insns/vclmul-32e.adoc +++ b/doc/vector-extra/insns/vclmul-32e.adoc @@ -56,7 +56,7 @@ Arguments:: [NOTE] ==== -`vclmul` instruction was initially defined in `Zvbc` with only `SEW=64-bit` support, this page describes how the specification is extended in `Zvbc32e` to support `SEW=32 bits`. +`vclmul` instruction was initially defined in `Zvbc` with only `SEW=64-bit` support, this page describes how the specification is extended in `Zvbc32e` to support `SEW=32` bits. ==== Description:: diff --git a/doc/vector-extra/insns/vclmulh-32e.adoc b/doc/vector-extra/insns/vclmulh-32e.adoc index 3800452d..e8fa6cbe 100644 --- a/doc/vector-extra/insns/vclmulh-32e.adoc +++ b/doc/vector-extra/insns/vclmulh-32e.adoc @@ -1,4 +1,4 @@ -[[insns-vclmulh, Vector Carry-less Multiply Return High Half]] +[[insns-vclmulh-32e, Vector Carry-less Multiply Return High Half]] = vclmulh.[vv,vx] Synopsis:: @@ -56,7 +56,7 @@ Arguments:: [NOTE] ==== -`vclmulh` instruction was initially defined in `Zvbc`, this page describes how the specification is extended in `Zvbc32e` to support `SEW=32 bits`. +`vclmulh` instruction was initially defined in `Zvbc`, this page describes how the specification is extended in `Zvbc32e` to support `SEW=32` bits. ==== Description:: @@ -96,4 +96,4 @@ function clmulh(x, y, width) = { -- Included in:: -<> +<>, Zvbc diff --git a/doc/vector-extra/insns/vghsh-vs.adoc b/doc/vector-extra/insns/vghsh-vs.adoc index e1bf1c7d..fcd9d533 100644 --- a/doc/vector-extra/insns/vghsh-vs.adoc +++ b/doc/vector-extra/insns/vghsh-vs.adoc @@ -39,17 +39,17 @@ Arguments:: |SEW |Definition -| Vd | input | 128 | 4 | 32 | Partial hash (Y~i~) -| Vs1 | input | 128 | 4 | 32 | Cipher text (X~i~) -| Vs2 | input | 128 | 4 | 32 | Hash Subkey (H) -| Vd | output | 128 | 4 | 32 | Partial-hash (Y~i+1~) +| `vd` | input | 128 | 4 | 32 | Partial hash (Y~i~) +| `vs1` | input | 128 | 4 | 32 | Cipher text (X~i~) +| `vs2` | input | 128 | 4 | 32 | Hash Subkey (H) +| `vd` | output | 128 | 4 | 32 | Partial-hash (Y~i+1~) |=== Description:: A single "iteration" of the GHASH~H~ algorithm is performed. -The previous partial hashes are read as 4-element groups from 'vd', +The previous partial hashes are read as 4-element groups from `vd`, the cipher texts are read as 4-element groups from `vs1` and the hash subkeys are read from the scalar element group in `vs2`. The resulting partial hashes are writen as 4-element groups into `vd`. @@ -102,7 +102,7 @@ function clause execute (VGHSHVS(vs2, vs1, vd)) = { eg_len = (vl/EGS) eg_start = (vstart/EGS) - // H is component to all element groups + // H is common to all element groups let helem = 0; let H = brev8(get_velem(vs2, EGW=128, helem)); // Hash subkey diff --git a/doc/vector-extra/insns/vgmul-vs.adoc b/doc/vector-extra/insns/vgmul-vs.adoc index 1192f334..622badd1 100644 --- a/doc/vector-extra/insns/vgmul-vs.adoc +++ b/doc/vector-extra/insns/vgmul-vs.adoc @@ -37,15 +37,15 @@ Arguments:: |SEW |Definition -| Vd | input | 128 | 4 | 32 | Multiplier -| Vs2 | input | 128 | 4 | 32 | Multiplicand -| Vd | output | 128 | 4 | 32 | Product +| `vd` | input | 128 | 4 | 32 | Multiplier +| `vs2` | input | 128 | 4 | 32 | Multiplicand +| `vd` | output | 128 | 4 | 32 | Product |=== Description:: A GHASH~H~ multiply is performed. -The multipliers are read as 4-element groups from 'vd', +The multipliers are read as 4-element groups from `vd`, the multiplicands subkeys are read from the scalar element group in `vs2`. The resulting products are written as 4-element groups into `vd`. @@ -98,7 +98,7 @@ function clause execute (VGMUL(vs2, vs1, vd, suffix)) = { eg_len = (vl/EGS) eg_start = (vstart/EGS) - // H multiplicand is constant for all loop iterations + // H multiplicand is common for all loop iterations let helem = 0; let H = brev8(get_velem(vs2,EGW=128, helem)); // Multiplicand diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc index 53da28cc..33fec430 100644 --- a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc +++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc @@ -2,8 +2,8 @@ = RISC-V Cryptography Extensions Volume III: Additional Vector Instructions :description: The addtional vector cryptography extensions for the RISC-V ISA. :company: RISC-V.org -:revdate: 6 February 2024 -:revnumber: v0.0.4 +:revdate: March 7th 2024 +:revnumber: v0.0.5 :revremark: :url-riscv: http://riscv.org :doctype: book @@ -75,6 +75,7 @@ Contributors to this specification (in alphabetical order) include: + Eric Biggers, Ken Dockser, +Liana Koleva, Markku-Juhani O. Saarinen, Nicolas Brunie, Richard Newell @@ -102,6 +103,7 @@ Instruction Set Extension Specification. All the Additional Vector Crypto Extensions can be built on _any_ embedded (Zve*) or application ("V") base Vector Extension. +In particular `Zvbc32e` allows `Zve32*` implementations to support vector carry-less multiplication. // See <> for more details on vector element groups and the drawbacks of // small `VLEN` values. @@ -157,6 +159,8 @@ bibliography::../riscv-crypto-spec.bib[ieee] [[Encodings]] == Encodings +include::./riscv-crypto-vector-extra-inst-table-zvbc32e.adoc[] + include::./riscv-crypto-vector-extra-inst-table.adoc[] diff --git a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc index 8bd81a2d..b1439419 100644 --- a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc +++ b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc @@ -1,10 +1,10 @@ [appendix] -[[crypto_vector_instructions]] +[[crypto_vector_instructions_Zvkgs]] === Additional Vector Cryptographic Instructions OP-P (0x77) Vector Crypto instructions, including `Zvkgs`, except `Zvbb` and `Zvbc`. -The new/modified encoding are in bold and underlined. +The new/modified encodings are in bold. // [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"] [cols="4,1,1,1,1,4,1,1,1,4,1,1,1"] @@ -20,7 +20,6 @@ The new/modified encoding are in bold and underlined. // [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"] [cols="6,1,1,1,1,6,1,1,6,6,1,1,1"] -// TODO to be updated with vghsh.vs and vgmul.vs encoding |=== 5+^| funct6 4+^| funct6 4+^| funct6 diff --git a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc index 8d057e6a..6a516729 100644 --- a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc +++ b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc @@ -4,7 +4,8 @@ This document describes the proposed _additional_ _vector_ cryptography extensions for RISC-V. Those extensions extend the _vector_ cryptography extensions for RISC-V, -providing additional features not mandatory for a high performace implementation but which -can help further improve the efficiency some algorithms (e.g. CRC, AES-GCM). +providing additional features. +Those extensions aim at either enabling some use cases (e.g. carry-less multiply on 32-bit vector implementations) +or enabling more efficient implementations of some algorithms (e.g. CRC, AES-GCM). All instructions proposed here are based on the Vector registers.