diff --git a/doc/vector-extra/Makefile b/doc/vector-extra/Makefile new file mode 100644 index 00000000..3dd87daa --- /dev/null +++ b/doc/vector-extra/Makefile @@ -0,0 +1,67 @@ +# Makefile for RISC-V Doc Template +# +# This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 +# International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. +# +# SPDX-License-Identifier: CC-BY-SA-4.0 +# +# Description: +# +# This Makefile is designed to automate the process of building and packaging +# the Doc Template for RISC-V Extensions. + +DOCKER_RUN := docker run --rm -v ${PWD}:/build -w /build \ +riscvintl/riscv-docs-base-container-image:latest +HEADER_SOURCE := riscv-crypto-spec-vector-extra.adoc +PDF_RESULT := riscv-crypto-spec-vector-extra.pdf +SPEC_COMMIT= git-commit.adoc +ASCIIDOCTOR_PDF := asciidoctor-pdf +OPTIONS := --trace \ + -a compress \ + -a mathematical-format=svg \ + -a pdf-fontsdir=resources/fonts \ + -a pdf-style=resources/themes/risc-v_spec-pdf.yml \ + -a toc \ + --failure-level=ERROR +REQUIRES := --require=asciidoctor-bibtex \ + --require=asciidoctor-diagram \ + --require=asciidoctor-mathematical + +.PHONY: all build clean build-container build-no-container + +all: build + +cp_bib: + @cp ../riscv-crypto-spec.bib ./ + +$(SPEC_COMMIT): + @git rev-parse --abbrev-ref HEAD > ${@} + @echo "@" >> ${@} + @git log --pretty=format:'%H' -n 1 >> ${@} + +build: cp_bib $(SPEC_COMMIT) + @echo "Checking if Docker is available..." + @if command -v docker &> /dev/null ; then \ + echo "Docker is available, building inside Docker container..."; \ + $(MAKE) build-container; \ + else \ + echo "Docker is not available, building without Docker..."; \ + $(MAKE) build-no-container; \ + fi + +build-container: + @echo "Starting build inside Docker container..." + $(DOCKER_RUN) /bin/sh -c "$(ASCIIDOCTOR_PDF) $(OPTIONS) $(REQUIRES) --out-file=$(PDF_RESULT) $(HEADER_SOURCE)" + @echo "Build completed successfully inside Docker container." + +build-no-container: + @echo "Starting build..." + $(ASCIIDOCTOR_PDF) $(OPTIONS) $(REQUIRES) --out-file=$(PDF_RESULT) $(HEADER_SOURCE) + @echo "Build completed successfully." + +clean: + @echo "Cleaning up generated files..." + rm -f $(PDF_RESULT) + @echo "Cleanup completed." diff --git a/doc/vector-extra/images/risc-v_logo.png b/doc/vector-extra/images/risc-v_logo.png new file mode 100644 index 00000000..d754746e Binary files /dev/null and b/doc/vector-extra/images/risc-v_logo.png differ diff --git a/doc/vector-extra/insns/vclmul-32e.adoc b/doc/vector-extra/insns/vclmul-32e.adoc new file mode 100644 index 00000000..7a47de1c --- /dev/null +++ b/doc/vector-extra/insns/vclmul-32e.adoc @@ -0,0 +1,104 @@ +[[insns-vclmul-32e, Vector Carry-less Multiply]] += vclmul.[vv,vx] + +Synopsis:: +Vector Carry-less Multiply by vector or scalar - returning low half of product. + +Mnemonic:: +vclmul.vv vd, vs2, vs1, vm + +vclmul.vx vd, vs2, rs1, vm + +Encoding (Vector-Vector):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-V'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVV'}, +{bits: 5, name: 'vs1'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: 'vm'}, +{bits: 6, name: '001100'}, +]} +.... + +Encoding (Vector-Scalar):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-V'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVX'}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: 'vm'}, +{bits: 6, name: '001100'}, +]} +.... +Reserved Encodings:: +* `SEW` is any value other than 32 (`Zvbc32e` only) +* `SEW` is any value other than 64 (`Zvbc` only) +* `SEW` is any value other than 32 or 64 (`Zvbc` and `Zvbc32e`) + +Arguments:: + +[%autowidth] +[%header,cols="4,2,2"] +|=== +|Register +|Direction +|Definition + +| `vs1`/`rs1` | input | multiplier +| `vs2` | input | multiplicand +| `vd` | output | lower part of carry-less multiply +|=== + +[NOTE] +==== +`vclmul` instruction was initially defined in `Zvbc` with only `SEW=64-bit` support, this page describes how the specification is extended in `Zvbc32e` to support `SEW=32` bits. +==== + +Description:: +Produces the low half of `2*SEW`-bit carry-less product. + +Each SEW-bit element in the `vs2` vector register is carry-less multiplied by +either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value +from integer register `rs1` (vector-scalar). The result is the least +significant SEW bits of the carry-less product. + +[NOTE] +==== +The 32-bit carryless multiply instructions can be used for implementing GCM in the absence of the `zvkg` extension. +In particular for implementation with `ELEN=32` where `Zvkg` cannot be implemented. +It can also be used to speed-up CRC evaluation. +==== + +Operation:: +[source,sail] +-- + + +function clause execute (VCLMUL(vs2, vs1, vd, suffix)) = { + + foreach (i from vstart to vl-1) { + let op1 : bits (SEW) = if suffix =="vv" then get_velem(vs1, i) + else zext_or_truncate_to_sew(X(vs1)); + let op2 : bits (SEW) = get_velem(vs2, i); + let product : bits (SEW) = clmul(op1, op2, SEW); + set_velem(vd, i, product); + } + RETIRE_SUCCESS +} + +function clmul(x, y, width) = { + let result : bits(width) = zeros(); + foreach (i from 0 to (width - 1)) { + if y[i] == 1 then result = result ^ (x << i); + } + result +} +-- + +Included in:: +<> diff --git a/doc/vector-extra/insns/vclmulh-32e.adoc b/doc/vector-extra/insns/vclmulh-32e.adoc new file mode 100644 index 00000000..e8fa6cbe --- /dev/null +++ b/doc/vector-extra/insns/vclmulh-32e.adoc @@ -0,0 +1,99 @@ +[[insns-vclmulh-32e, Vector Carry-less Multiply Return High Half]] += vclmulh.[vv,vx] + +Synopsis:: +Vector Carry-less Multiply by vector or scalar - returning high half of product. + +Mnemonic:: +vclmulh.vv vd, vs2, vs1, vm + +vclmulh.vx vd, vs2, rs1, vm + +Encoding (Vector-Vector):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-V'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVV'}, +{bits: 5, name: 'vs1'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: 'vm'}, +{bits: 6, name: '001101'}, +]} +.... + +Encoding (Vector-Scalar):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-V'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVX'}, +{bits: 5, name: 'rs1'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: 'vm'}, +{bits: 6, name: '001101'}, +]} +.... +Reserved Encodings:: +* `SEW` is any value other than 64 (`Zvbc` only) +* `SEW` is any value other than 32 (`Zvbc32e` only) +* `SEW` is any value other than 32 or 64 (`Zvbc32e` and `Zvbc`) + +Arguments:: + +[%autowidth] +[%header,cols="4,2,2"] +|=== +|Register +|Direction +|Definition + +| `vs1`/`rs1` | input | multiplier +| `vs2` | input | multiplicand +| `vd` | output | upper part of carry-less multiply +|=== + +[NOTE] +==== +`vclmulh` instruction was initially defined in `Zvbc`, this page describes how the specification is extended in `Zvbc32e` to support `SEW=32` bits. +==== + +Description:: +Produces the high half of `2*SEW`-bit carry-less product. + +Each SEW-bit element in the `vs2` vector register is carry-less multiplied by +either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value +from integer register `rs1` (vector-scalar). The result is the most +significant SEW bits of the carry-less product. + +// This instruction must always be implemented such that its execution latency does not depend +// on the data being operated upon. + +Operation:: +[source,sail] +-- +function clause execute (VCLMULH(vs2, vs1, vd, suffix)) = { + + foreach (i from vstart to vl-1) { + let op1 : bits (SEW) = if suffix =="vv" then get_velem(vs1,i) + else zext_or_truncate_to_sew(X(vs1)); + let op2 : bits (SEW) = get_velem(vs2, i); + let product : bits (SEW) = clmulh(op1, op2, SEW); + set_velem(vd, i, product); + } + RETIRE_SUCCESS +} + +function clmulh(x, y, width) = { + let result : bits(width) = 0; + foreach (i from 1 to (width - 1)) { + if y[i] == 1 then result = result ^ (x >> (width - i)); + } + result +} + +-- + +Included in:: +<>, Zvbc diff --git a/doc/vector-extra/insns/vghsh-vs.adoc b/doc/vector-extra/insns/vghsh-vs.adoc new file mode 100644 index 00000000..fcd9d533 --- /dev/null +++ b/doc/vector-extra/insns/vghsh-vs.adoc @@ -0,0 +1,136 @@ +[[insns-vghsh-vs, Vector-Scalar GHASH Add-Multiply]] += vghsh.vs + +Synopsis:: +Vector-Scalar Add-Multiply over GHASH Galois-Field + +Mnemonic:: +vghsh.vs vd, vs2, vs1 + + +// This might be the first instruction with 3 operands and .vs +// need to find an encoding +Encoding (Vector-Scalar):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-P'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVV'}, +{bits: 5, name: 'vs1'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: '1'}, +{bits: 6, name: '101100'}, +]} +.... + +Reserved Encodings:: +* `SEW` is any value other than 32 + +Arguments:: + +[%autowidth] +[%header,cols="4,2,2,2,2,2"] +|=== +|Register +|Direction +|EGW +|EGS +|SEW +|Definition + +| `vd` | input | 128 | 4 | 32 | Partial hash (Y~i~) +| `vs1` | input | 128 | 4 | 32 | Cipher text (X~i~) +| `vs2` | input | 128 | 4 | 32 | Hash Subkey (H) +| `vd` | output | 128 | 4 | 32 | Partial-hash (Y~i+1~) +|=== + +Description:: +A single "iteration" of the GHASH~H~ algorithm is performed. + + +The previous partial hashes are read as 4-element groups from `vd`, +the cipher texts are read as 4-element groups from `vs1` + and the hash subkeys are read from the scalar element group in `vs2`. +The resulting partial hashes are writen as 4-element groups into `vd`. + + +// The following is copied from vghsh.vv and could be omitted +// (replaced with a link to the original specification) + +This instruction treats all of the input and output element groups as 128-bit polynomials and +performs operations over GF[2]. +It produces the next partial hash (Y~i+1~) by adding the current partial +hash (Y~i~) to the cipher text block (X~i~) and then multiplying (over GF(2^128^)) +this sum by the Hash Subkey (H). + +The multiplication over GF(2^128^) is a carryless multiply of two 128-bit polynomials +modulo GHASH's irreducible polynomial (x^128^ + x^7^ + x^2^ + x + 1). + +The operation can be compactly defined as +// Y~i+1~ = (Y~i~ · H) ^ X~i~ +Y~i+1~ = ((Y~i~ ^ X~i~) · H) + +The NIST specification (see <>) orders the coefficients from left to right x~0~x~1~x~2~...x~127~ +for a polynomial x~0~ + x~1~u +x~2~ u^2^ + ... + x~127~u^127^. This can be viewed as a collection of +byte elements in memory with the byte containing the lowest coefficients (i.e., 0,1,2,3,4,5,6,7) +residing at the lowest memory address. Since the bits in the bytes are reversed, +This instruction internally performs bit swaps within bytes to put the bits in the standard ordering +(e.g., 7,6,5,4,3,2,1,0). + +This instruction must always be implemented such that its execution latency does not depend +on the data being operated upon. + +[NOTE] +==== +We are bit-reversing the bytes of inputs and outputs so that the intermediate values are consistent +with the NIST specification. These reversals are inexpensive to implement as they unconditionally +swap bit positions and therefore do not require any logic. +==== + + +Operation:: +[source,pseudocode] +-- +function clause execute (VGHSHVS(vs2, vs1, vd)) = { + // operands are input with bits reversed in each byte + if(LMUL*VLEN < EGW) then { + handle_illegal(); // illegal instruction exception + RETIRE_FAIL + } else { + + eg_len = (vl/EGS) + eg_start = (vstart/EGS) + + // H is common to all element groups + let helem = 0; + let H = brev8(get_velem(vs2, EGW=128, helem)); // Hash subkey + + foreach (i from eg_start to eg_len-1) { + let Y = get_velem(vd,EGW=128,i); // current partial-hash + let X = get_velem(vs1,EGW=128,i); // block cipher output + + let Z : bits(128) = 0; + + let S = brev8(Y ^ X); + + for (int bit = 0; bit < 128; bit++) { + if bit_to_bool(S[bit]) + Z ^= H + + bool reduce = bit_to_bool(H[127]); + H = H << 1; // left shift H by 1 + if (reduce) + H ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial + } + + let result = brev8(Z); // bit reverse bytes to get back to GCM standard ordering + set_velem(vd, EGW=128, i, result); + } + RETIRE_SUCCESS + } +} +-- + +Included in:: +<> diff --git a/doc/vector-extra/insns/vgmul-vs.adoc b/doc/vector-extra/insns/vgmul-vs.adoc new file mode 100644 index 00000000..622badd1 --- /dev/null +++ b/doc/vector-extra/insns/vgmul-vs.adoc @@ -0,0 +1,129 @@ +[[insns-vgmul-vs, Vector GHASH Multiply]] += vgmul.vs + +Synopsis:: +Vector-Scalar Multiply over GHASH Galois-Field + +Mnemonic:: +vgmul.vs vd, vs2 + + +Encoding (Vector-Scalar):: +[wavedrom, , svg] +.... +{reg:[ +{bits: 7, name: 'OP-P'}, +{bits: 5, name: 'vd'}, +{bits: 3, name: 'OPMVV'}, +{bits: 5, name: '10001'}, +{bits: 5, name: 'vs2'}, +{bits: 1, name: '1'}, +{bits: 6, name: '101001'}, +]} +.... + +Reserved Encodings:: +* `SEW` is any value other than 32 + +Arguments:: + +[%autowidth] +[%header,cols="4,2,2,2,2,2"] +|=== +|Register +|Direction +|EGW +|EGS +|SEW +|Definition + +| `vd` | input | 128 | 4 | 32 | Multiplier +| `vs2` | input | 128 | 4 | 32 | Multiplicand +| `vd` | output | 128 | 4 | 32 | Product +|=== + +Description:: +A GHASH~H~ multiply is performed. + +The multipliers are read as 4-element groups from `vd`, + the multiplicands subkeys are read from the scalar element group in `vs2`. +The resulting products are written as 4-element groups into `vd`. + +This instruction treats all of the inputs and outputs as 128-bit polynomials and +performs operations over GF[2]. +It produces the product over GF(2^128^) of the two 128-bit inputs. + +The multiplication over GF(2^128^) is a carryless multiply of two 128-bit polynomials +modulo GHASH's irreducible polynomial (x^128^ + x^7^ + x^2^ + x + 1). + +The NIST specification (see <>) orders the coefficients from left to right x~0~x~1~x~2~...x~127~ +for a polynomial x~0~ + x~1~u +x~2~ u^2^ + ... + x~127~u^127^. This can be viewed as a collection of +byte elements in memory with the byte containing the lowest coefficients (i.e., 0,1,2,3,4,5,6,7) +residing at the lowest memory address. Since the bits in the bytes are reversed, +This instruction internally performs bit swaps within bytes to put the bits in the standard ordering +(e.g., 7,6,5,4,3,2,1,0). + +This instruction must always be implemented such that its execution latency does not depend +on the data being operated upon. + +[NOTE] +==== +We are bit-reversing the bytes of inputs and outputs so that the intermediate values are consistent +with the NIST specification. These reversals are inexpensive to implement as they unconditionally +swap bit positions and therefore do not require any logic. +==== + + +[NOTE] +==== +Similarly to how the instruction `vgmul.vv` is identical to `vghsh.vv` with the value +of vs1 register being 0, the instruction `vgmul.vs` is identical to `vghsh.vs` with the value of vs1 being 0. +This instruction is often used in GHASH code. In some cases it is followed +by an XOR to perform a multiply-add. Implementations may choose to fuse these +two instructions to improve performance on GHASH code that +doesn't use the add-multiply form of the `vghsh.vv` instruction. + +==== + + +Operation:: +[source,pseudocode] +-- +function clause execute (VGMUL(vs2, vs1, vd, suffix)) = { + // operands are input with bits reversed in each byte + if(LMUL*VLEN < EGW) then { + handle_illegal(); // illegal instruction exception + RETIRE_FAIL + } else { + + eg_len = (vl/EGS) + eg_start = (vstart/EGS) + // H multiplicand is common for all loop iterations + let helem = 0; + let H = brev8(get_velem(vs2,EGW=128, helem)); // Multiplicand + + foreach (i from eg_start to eg_len-1) { + let Y = brev8(get_velem(vd,EGW=128,i)); // Multiplier + let Z : bits(128) = 0; + + for (int bit = 0; bit < 128; bit++) { + if bit_to_bool(Y[bit]) + Z ^= H + + bool reduce = bit_to_bool(H[127]); + H = H << 1; // left shift H by 1 + if (reduce) + H ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial + } + + + let result = brev8(Z); + set_velem(vd, EGW=128, i, result); + } + RETIRE_SUCCESS + } +} +-- + +Included in:: +<> diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc new file mode 100644 index 00000000..33fec430 --- /dev/null +++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc @@ -0,0 +1,166 @@ +[[riscv-doc-template]] += RISC-V Cryptography Extensions Volume III: Additional Vector Instructions +:description: The addtional vector cryptography extensions for the RISC-V ISA. +:company: RISC-V.org +:revdate: March 7th 2024 +:revnumber: v0.0.5 +:revremark: +:url-riscv: http://riscv.org +:doctype: book +//:doctype: report +:preface-title: Preamble +:colophon: +:appendix-caption: Appendix +:imagesdir: images +:title-logo-image: image:risc-v_logo.png[pdfwidth=3.25in,align=center] +//:page-background-image: image:draft.svg[opacity=20%] +//:title-page-background-image: none +//:back-cover-image: image:circuit.png[opacity=25%] +// Settings: +:experimental: +:reproducible: +// needs to be changed? bug discussion started +:WaveDromEditorApp: wavedrom-cli +:imagesoutdir: images +:icons: font +:lang: en +:listing-caption: Listing +:sectnums: +:toc: left +:toclevels: 4 +:source-highlighter: pygments +ifdef::backend-pdf[] +:source-highlighter: coderay +endif::[] +:data-uri: +:hide-uri-scheme: +:stem: latexmath +:footnote: +:xrefstyle: short +:bibtex-file: riscv-crypto-spec.bib +:bibtex-order: alphabetical +:bibtex-style: ieee + +//:This is the preamble. + +[colophon] += Colophon + +This document describes additional Vector Cryptography extensions to the +RISC-V Instruction Set Architecture. + +This document is _Discussion Document_. +Assume everything can change. +This document is not complete yet and was created only for the purpose of conversation outside of the document. +For more information, see link:http://riscv.org/spec-state[here]. + +[NOTE] +.Copyright and licensure: +This work is licensed under a +link:http://creativecommons.org/licenses/by/4.0/[Creative Commons Attribution 4.0 International License] + +[NOTE] +.Document Version Information: +==== +include::git-commit.adoc[] + +See link:https://github.com/riscv/riscv-crypto/doc/vector-extra[github.com/riscv/riscv-crypto/doc/vector-extra] +for more information. +==== + +[acknowledgments] +== Acknowledgments + +Contributors to this specification (in alphabetical order) +include: + +Eric Biggers, +Ken Dockser, +Liana Koleva, +Markku-Juhani O. Saarinen, +Nicolas Brunie, +Richard Newell + +We are all very grateful to the many other people who have +helped to improve this specification through their comments, reviews, +feedback and questions. + +// ------------------------------------------------------------ + +include::riscv-crypto-vector-extra-introduction.adoc[] + +// ------------------------------------------------------------ + +<<< +// ------------------------------------------------------------ + + +[[crypto_vector_extensions]] +== Extensions Overview + +The section introduces all of the extensions in the Additional Vector Cryptography +Instruction Set Extension Specification. + + +All the Additional Vector Crypto Extensions can be built +on _any_ embedded (Zve*) or application ("V") base Vector Extension. +In particular `Zvbc32e` allows `Zve32*` implementations to support vector carry-less multiplication. + +// See <> for more details on vector element groups and the drawbacks of +// small `VLEN` values. + + +As the instructions defined in this specification might be used to implement cryptographic primitives + they may be implemented with data-independent execution latencies as +defined in the +link:https://github.com/riscv/riscv-crypto/releases/tag/v1.0.1-scalar[RISC-V Scalar Cryptography Extensions specification]. + +If `Zvkt` is implemented, all the instructions from `Zvbc32e` (`vclmul[h].[vv,vx]`) +shall be executed with data-independent execution latency. + +Whether `Zvkt` is implemented or not, all instructions from `Zvkgs` (`vgmul.vs`, `vghsh.vs`) +shall be executed with data-independent execution latency. + + +Detection of individual cryptography extensions uses the +unified software-based RISC-V discovery method. + +[NOTE] +==== +At the time of writing, these discovery mechanisms are still a work in +progress. +==== + +include::./riscv-crypto-vector-extra-zvbc32e.adoc[] +<<< +include::./riscv-crypto-vector-extra-zvkgs.adoc[] +<<< + + + +// ------------------------------------------------------------ + +[[crypto_vector_extra_insns, reftext="Additional Vector Cryptography Instructions"]] +== Instructions + + +include::insns/vclmul-32e.adoc[leveloffset=+2] +<<< +include::insns/vclmulh-32e.adoc[leveloffset=+2] +<<< +include::insns/vghsh-vs.adoc[leveloffset=+2] +<<< +include::insns/vgmul-vs.adoc[leveloffset=+2] +<<< + +[[bibliography]] +== Bibliography + +bibliography::../riscv-crypto-spec.bib[ieee] + +[[Encodings]] +== Encodings +include::./riscv-crypto-vector-extra-inst-table-zvbc32e.adoc[] + +include::./riscv-crypto-vector-extra-inst-table.adoc[] + + diff --git a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc new file mode 100644 index 00000000..b1439419 --- /dev/null +++ b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc @@ -0,0 +1,59 @@ +[appendix] +[[crypto_vector_instructions_Zvkgs]] +=== Additional Vector Cryptographic Instructions + +OP-P (0x77) +Vector Crypto instructions, including `Zvkgs`, except `Zvbb` and `Zvbc`. +The new/modified encodings are in bold. + +// [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"] +[cols="4,1,1,1,1,4,1,1,1,4,1,1,1"] +|=== +5+^|Integer 4+^|Integer 4+^| FP + +| funct3 | | | | | funct3 | | | | funct3 | | | +| OPIVV |V| | | | OPMVV |V| | | OPFVV |V| | +| OPIVX | |X| | | OPMVX | |X| | OPFVF | |F| +| OPIVI | | |I| | | | | | | | | +|=== + +// [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"] +[cols="6,1,1,1,1,6,1,1,6,6,1,1,1"] + +|=== +5+^| funct6 4+^| funct6 4+^| funct6 + +|100000 | | | | | 100000 |V| | vsm3me | 100000 | | | +| 100001 | | | | | 100001 |V| | vsm4k.vi | 100001 | | | +| 100010 | | | | | 100010 |V| | vaesfk1.vi | 100010 | | | +| 100011 | | | | | 100011 |V| | __**vghsh.vs**__ | 100011 | | | +| 100100 | | | | | 100100 | | | | 100100 | | | +| 100101 | | | | | 100101 | | | | 100101 | | | +| 100110 | | | | | 100110 | | | | 100110 | | | +| 100111 | | | | | 100111 | | | | 100111 | | | +| | | | | | | | | | | | | +| 101000 | | | | | 101000 |V| | VAES.vv | 101000 | | | +| 101001 | | | | | 101001 |V| | *VAES.vs* | 101001 | | | +| 101010 | | | | | 101010 |V| | vaesfk2.vi | 101010 | | | +| 101011 | | | | | 101011 |V| | vsm3c.vi | 101011 | | | +| 101100 | | | | | 101100 |V| | vghsh | 101100 | | | +| 101101 | | | | | 101101 |V| | vsha2ms | 101101 | | | +| 101110 | | | | | 101110 |V| | vsha2ch | 101110 | | | +| 101111 | | | | | 101111 |V| | vsha2cl | 101111 | | | +|=== + +<<< + +.VAES.vv and VAES.vs encoding space +[cols="2,14"] +|=== +|vs1| + +| 00000 | vaesdm +| 00001 | vaesdf +| 00010 | vaesem +| 00011 | vaesef +| 00111 | vaesz +| 10000 | vsm4r +| 10001 | __**vgmul**__ +|=== diff --git a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc new file mode 100644 index 00000000..6a516729 --- /dev/null +++ b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc @@ -0,0 +1,11 @@ +[[crypto_vector_introduction]] +== Introduction + +This document describes the proposed _additional_ _vector_ cryptography +extensions for RISC-V. +Those extensions extend the _vector_ cryptography extensions for RISC-V, +providing additional features. +Those extensions aim at either enabling some use cases (e.g. carry-less multiply on 32-bit vector implementations) +or enabling more efficient implementations of some algorithms (e.g. CRC, AES-GCM). +All instructions proposed here are based on the Vector registers. + diff --git a/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc new file mode 100644 index 00000000..9cf42177 --- /dev/null +++ b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc @@ -0,0 +1,23 @@ +[[zvbc32e,Zvbc32e]] +=== `Zvbc32e` - Vector Carryless Multiplication + +General purpose carryless multiplication instructions which are commonly used in cryptography +and hashing (e.g., Elliptic curve cryptography, GHASH, CRC). + +These instructions are only defined for `SEW`=32. +Zvbc32e can be supported when `ELEN >=32`. + + +Note:: The extension `Zvbc32e` is independent from `Zvbc` which defines the same instructions for `SEW=64`. + When `ELEN>=64` both extensions can be combined to have `vclmul.v[vx]` and `vclmulh.v[vx]` defined for both `SEW=32` and `SEW=64`. + +[%autowidth] +[%header,cols="^2,4"] +|=== +|Mnemonic +|Instruction +| `vclmul.[vv,vx]` | <> +| `vclmulh.[vv,vx]` | <> + +|=== + diff --git a/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc new file mode 100644 index 00000000..99155dc5 --- /dev/null +++ b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc @@ -0,0 +1,36 @@ +[[zvkgs,Zvkgs]] +=== `Zvkgs` - Vector-Scalar GCM/GMAC + +Instructions to enable the efficient implementation of parallel versions of GHASH~H~ which is used in Galois/Counter Mode (GCM) and +Galois Message Authentication Code (GMAC). + +`Zvkgs` depends on `Zvkg`. It extends the existing `vghsh.vv` and `vgmul.vv` instructions with new vector-scalar variants: `vghsh.vs` and `vgmul.vs`. + +The instructions inherit the constraints defined in `Zvkg`: + +- element group size (EGS) is 4 +- data independent execution timing +- `vl`/`vstart` must be multiples of EGS=4 + +All of these instructions work on 128-bit element groups comprised of four 32-bit elements, in element group parlance `EGS=4`, `EGW=128` and the instructions are only defined for `SEW=32`. + +To help avoid side-channel timing attacks, these instructions shall always be implemented with data-independent timing. + +The number of element groups to be processed is `vl`/`EGS`. +`vl` must be set to the number of `SEW=32` elements to be processed and +therefore must be a multiple of `EGS=4`. + +Likewise, `vstart` must be a multiple of `EGS=4`. + +[%autowidth] +[%header,cols="^2,4,4,4"] +|=== + +|SEW +|EGW +|Mnemonic +|Instruction +| 32 | 128 | `vghsh.vs` | <> +| 32 | 128 | `vgmul.vs` | <> + +|=== +