From 4a59d4ba083a1ae4a5d24c6dff726f2508d9a245 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Mon, 14 Aug 2023 02:39:20 -0700
Subject: [PATCH 01/26] [Zv fast track] prototyping vclmul* changes

---
 doc/vector/insns/vclmul.adoc  | 21 +++++++++------------
 doc/vector/insns/vclmulh.adoc | 19 ++++++++++---------
 2 files changed, 19 insertions(+), 21 deletions(-)
diff --git a/doc/vector/insns/vclmul.adoc b/doc/vector/insns/vclmul.adoc
index ad941888..499d7562 100644
--- a/doc/vector/insns/vclmul.adoc
+++ b/doc/vector/insns/vclmul.adoc
@@ -36,7 +36,8 @@ Encoding (Vector-Scalar)::
 ]}
 ....
 Reserved Encodings::
-* `SEW` is any value other than 64
+* `SEW` is any value other than 64 (`Zvbc`)
+* `SEW` is any value other than 32 or 64 (`Zvbcb`)
 
 Arguments::
 
@@ -55,20 +56,16 @@ Arguments::
 Description::
 Produces the low half of 128-bit carry-less product.
 
-Each 64-bit element in the `vs2` vector register is carry-less multiplied by 
-either each 64-bit element in `vs1` (vector-vector), or the 64-bit value
+Each SEW-bit element in the `vs2` vector register is carry-less multiplied by
+either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value
 from integer register `rs1` (vector-scalar). The result is the least
-significant 64 bits of the carry-less product.
+significant SEW bits of the carry-less product.
 
 [NOTE]
 ====
 The 64-bit carryless multiply instructions can be used for implementing GCM in the absence of the `zvkg` extension.
 We do not make these instructions exclusive as the 64-bit carryless multiply is readily derived from the
 instructions in the `zvkg` extension and can have utility in other areas.
-Likewise, we treat other SEW values as reserved so as not to preclude
-future extensions from using this opcode with different element widths.
-For example, a future extension might define an `SEW`=32 version of this instruction to enable `Zve32*` implementations to have
-vector carryless multiplication instructions.
 ====
 
 Operation::
@@ -79,10 +76,10 @@ Operation::
 function clause execute (VCLMUL(vs2, vs1, vd, suffix)) = {
 
   foreach (i from vstart to vl-1) {
-    let op1 : bits (64) = if suffix =="vv" then get_velem(vs1,i)
+    let op1 : bits (SEW) = if suffix =="vv" then get_velem(vs1, i)
                           else zext_or_truncate_to_sew(X(vs1));
-    let op2 : bits (64) = get_velem(vs2,i);
-    let product : bits (64) = clmul(op1,op2,SEW);
+    let op2 : bits (SEW) = get_velem(vs2, i);
+    let product : bits (SEW) = clmul(op1, op2, SEW);
     set_velem(vd, i, product);
   }
   RETIRE_SUCCESS
@@ -98,4 +95,4 @@ function clmul(x, y, width) = {
 --
 
 Included in::
-<<zvbc>>, <<zvknc>>, <<zvksc>>
+<<zvbc>>, <<zvknc>>, <<zvksc>>, <<zvbcb>>
diff --git a/doc/vector/insns/vclmulh.adoc b/doc/vector/insns/vclmulh.adoc
index 44f125ce..b5c0acb7 100644
--- a/doc/vector/insns/vclmulh.adoc
+++ b/doc/vector/insns/vclmulh.adoc
@@ -36,7 +36,8 @@ Encoding (Vector-Scalar)::
 ]}
 ....
 Reserved Encodings::
-* `SEW` is any value other than 64
+* `SEW` is any value other than 64 (`Zvbcb`)
+* `SEW` is any value other than 32 or 64 (`Zvbcb`)
 
 Arguments::
 
@@ -52,13 +53,13 @@ Arguments::
 | Vd  | output | carry-less product high
 |===
 
-Description:: 
+Description::
 Produces the high half of 128-bit carry-less product.
 
-Each 64-bit element in the `vs2` vector register is carry-less multiplied by 
-either each 64-bit element in `vs1` (vector-vector), or the 64-bit value
+Each SEW-bit element in the `vs2` vector register is carry-less multiplied by
+either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value
 from integer register `rs1` (vector-scalar). The result is the most
-significant 64 bits of the carry-less product.
+significant SEW bits of the carry-less product.
 
 // This instruction must always be implemented such that its execution latency does not depend
 // on the data being operated upon.
@@ -69,10 +70,10 @@ Operation::
 function clause execute (VCLMULH(vs2, vs1, vd, suffix)) = {
 
   foreach (i from vstart to vl-1) {
-    let op1 : bits (64) = if suffix =="vv" then get_velem(vs1,i)
+    let op1 : bits (SEW) = if suffix =="vv" then get_velem(vs1,i)
                           else zext_or_truncate_to_sew(X(vs1));
-    let op2 : bits (64) = get_velem(vs2, i);
-    let product : bits (64) = clmulh(op1, op2, SEW);
+    let op2 : bits (SEW) = get_velem(vs2, i);
+    let product : bits (SEW) = clmulh(op1, op2, SEW);
     set_velem(vd, i, product);
   }
   RETIRE_SUCCESS
@@ -89,4 +90,4 @@ function clmulh(x, y, width) = {
 --
 
 Included in::
-<<zvbc>>, <<zvknc>>, <<zvksc>>
+<<zvbc>>, <<zvbcb>>, <<zvknc>>, <<zvksc>>

From a1bfcfce97ab683584bb9da4dc672be511c4a5cc Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Mon, 14 Aug 2023 02:39:31 -0700
Subject: [PATCH 02/26] [Zv fast track] prototyping vg* changes

---
 doc/vector/insns/vghsh.adoc | 41 +++++++++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/doc/vector/insns/vghsh.adoc b/doc/vector/insns/vghsh.adoc
index cd02b0e6..bb9c97a4 100644
--- a/doc/vector/insns/vghsh.adoc
+++ b/doc/vector/insns/vghsh.adoc
@@ -1,13 +1,14 @@
 [[insns-vghsh, Vector GHASH Add-Multiply]]
-= vghsh.vv
+= vghsh.[vv,vs]
 
 Synopsis::
 Vector Add-Multiply over GHASH Galois-Field
 
 Mnemonic::
-vghsh.vv vd, vs2, vs1
+vghsh.vv vd, vs2, vs1 +
+vghsh.vs vd, vs2, vs1
 
-Encoding::
+Encoding (Vector-Vector)::
 [wavedrom, , svg]
 ....
 {reg:[
@@ -20,8 +21,25 @@ Encoding::
 {bits: 6, name: '101100'},
 ]}
 ....
+
+// This might be the first instruction with 3 operands and .vs
+// need to find an encoding
+Encoding (Vector-Scalar)::
+[wavedrom, , svg]
+....
+{reg:[
+{bits: 7, name: 'OP-P'},
+{bits: 5, name: 'vd'},
+{bits: 3, name: 'OPMVV'},
+{bits: 5, name: 'vs1'},
+{bits: 5, name: 'vs2'},
+{bits: 1, name: '1'},
+{bits: 6, name: '101100'},
+]}
+....
+
 Reserved Encodings::
-* `SEW` is any value other than 32 
+* `SEW` is any value other than 32
 
 Arguments::
 
@@ -41,10 +59,10 @@ Arguments::
 | Vd  | output | 128  | 4 | 32 | Partial-hash (Y~i+1~)
 |===
 
-Description:: 
+Description::
 A single "iteration" of the GHASH~H~ algorithm is performed.
 
-This instruction treats all of the inputs and outputs as 128-bit polynomials and 
+This instruction treats all of the inputs and outputs as 128-bit polynomials and
 performs operations over GF[2].
 It produces the next partial hash (Y~i+1~) by adding the current partial
 hash (Y~i~) to the cipher text block (X~i~) and then multiplying (over GF(2^128^))
@@ -60,7 +78,7 @@ Y~i+1~ = ((Y~i~ ^ X~i~) &#183; H)
 The NIST specification (see <<zvkg>>) orders the coefficients from left to right x~0~x~1~x~2~...x~127~
 for a polynomial x~0~ + x~1~u +x~2~ u^2^ + ... + x~127~u^127^. This can be viewed as a collection of
 byte elements in memory with the byte containing the lowest coefficients (i.e., 0,1,2,3,4,5,6,7)
-residing at the lowest memory address. Since the bits in the bytes are reversed, 
+residing at the lowest memory address. Since the bits in the bytes are reversed,
 This instruction internally performs bit swaps within bytes to put the bits in the standard ordering
 (e.g., 7,6,5,4,3,2,1,0).
 
@@ -78,7 +96,7 @@ swap bit positions and therefore do not require any logic.
 ====
 Since the same hash subkey `H` will typically be used repeatedly on a given message,
 a future extension might define a vector-scalar version of this instruction where
-`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1. 
+`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1.
 ====
 
 Operation::
@@ -93,11 +111,12 @@ function clause execute (VGHSH(vs2, vs1, vd)) = {
 
   eg_len = (vl/EGS)
   eg_start = (vstart/EGS)
-  
+
   foreach (i from eg_start to eg_len-1) {
+    let helem = if suffix == "vv" then i else 0;
     let Y = (get_velem(vd,EGW=128,i));  // current partial-hash
     let X = get_velem(vs1,EGW=128,i);  // block cipher output
-    let H = brev8(get_velem(vs2,EGW=128,i)); // Hash subkey
+    let H = brev8(get_velem(vs2, EGW=128, helem)); // Hash subkey
 
     let Z : bits(128) = 0;
 
@@ -122,4 +141,4 @@ function clause execute (VGHSH(vs2, vs1, vd)) = {
 --
 
 Included in::
-<<zvkg>>, <<zvkng>>, <<zvksg>>
+<<zvkg>>, <<zvkgb>>, <<zvkng>>, <<zvksg>>

From bc7f52746c27030133b47fafe3247a7821ac3a45 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Mon, 14 Aug 2023 04:55:27 -0700
Subject: [PATCH 03/26] Completing vghsh.vs/vgmul.vs descriptions

---
 doc/vector/insns/vghsh.adoc | 21 ++++++++-------
 doc/vector/insns/vgmul.adoc | 53 +++++++++++++++++++++++++------------
 2 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/doc/vector/insns/vghsh.adoc b/doc/vector/insns/vghsh.adoc
index bb9c97a4..b487b11d 100644
--- a/doc/vector/insns/vghsh.adoc
+++ b/doc/vector/insns/vghsh.adoc
@@ -6,7 +6,7 @@ Vector Add-Multiply over GHASH Galois-Field
 
 Mnemonic::
 vghsh.vv vd, vs2, vs1 +
-vghsh.vs vd, vs2, vs1
+vghsh.vs vd, rs2, vs1
 
 Encoding (Vector-Vector)::
 [wavedrom, , svg]
@@ -40,6 +40,7 @@ Encoding (Vector-Scalar)::
 
 Reserved Encodings::
 * `SEW` is any value other than 32
+* `vghsh.vs` encoding (except if `Zvkgb` is enabled)
 
 Arguments::
 
@@ -62,7 +63,15 @@ Arguments::
 Description::
 A single "iteration" of the GHASH~H~ algorithm is performed.
 
-This instruction treats all of the inputs and outputs as 128-bit polynomials and
+
+The previous partial hashes are read as 4-element groups from 'vd',
+the cipher texts are read as 4-element groups from `vs1`
+ and the hash subkeys are read from either the corresponding 4-element group
+in `vs2` (vector-vector form) or the scalar element group in `vs2`
+(vector-scalar form, `Zvkgb` only). The resulting partial hashes are writen as 4-element groups into `vd`.
+
+
+This instruction treats all of the input and output element groups as 128-bit polynomials and
 performs operations over GF[2].
 It produces the next partial hash (Y~i+1~) by adding the current partial
 hash (Y~i~) to the cipher text block (X~i~) and then multiplying (over GF(2^128^))
@@ -92,17 +101,11 @@ with the NIST specification. These reversals are inexpensive to implement as the
 swap bit positions and therefore do not require any logic.
 ====
 
-[NOTE]
-====
-Since the same hash subkey `H` will typically be used repeatedly on a given message,
-a future extension might define a vector-scalar version of this instruction where
-`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1.
-====
 
 Operation::
 [source,pseudocode]
 --
-function clause execute (VGHSH(vs2, vs1, vd)) = {
+function clause execute (VGHSH(vs2, vs1, vd, suffix)) = {
   // operands are input with bits reversed in each byte
   if(LMUL*VLEN < EGW)  then {
     handle_illegal();  // illegal instruction exception
diff --git a/doc/vector/insns/vgmul.adoc b/doc/vector/insns/vgmul.adoc
index 0008132c..ca858010 100644
--- a/doc/vector/insns/vgmul.adoc
+++ b/doc/vector/insns/vgmul.adoc
@@ -7,7 +7,7 @@ Vector Multiply over GHASH Galois-Field
 Mnemonic::
 vgmul.vv vd, vs2
 
-Encoding::
+Encoding (Vector-Vector)::
 [wavedrom, , svg]
 ....
 {reg:[
@@ -20,8 +20,25 @@ Encoding::
 {bits: 6, name: '101000'},
 ]}
 ....
+
+
+Encoding (Vector-Scalar)::
+[wavedrom, , svg]
+....
+{reg:[
+{bits: 7, name: 'OP-P'},
+{bits: 5, name: 'vd'},
+{bits: 3, name: 'OPMVV'},
+{bits: 5, name: '10001'},
+{bits: 5, name: 'vs2'},
+{bits: 1, name: '1'},
+{bits: 6, name: '101001'},
+]}
+....
+
 Reserved Encodings::
-* `SEW` is any value other than 32 
+* `SEW` is any value other than 32
+* `vgmul.vs` encoding (except if `Zvkgb` is enabled)
 
 Arguments::
 
@@ -40,9 +57,14 @@ Arguments::
 | Vd  | output | 128  | 4 | 32 | Product
 |===
 
-Description:: 
+Description::
 A GHASH~H~ multiply is performed.
 
+The multipliers are read as 4-element groups from 'vd',
+ the multiplicands subkeys are read from either the corresponding 4-element group
+in `vs2` (vector-vector form) or the scalar element group in `vs2`
+(vector-scalar form, `Zvkgb` only). The resulting products are written as 4-element groups into `vd`.
+
 This instruction treats all of the inputs and outputs as 128-bit polynomials and 
 performs operations over GF[2].
 It produces the product over GF(2^128^) of the two 128-bit inputs.
@@ -67,27 +89,23 @@ with the NIST specification. These reversals are inexpensive to implement as the
 swap bit positions and therefore do not require any logic.
 ====
 
-[NOTE]
-====
-Since the same multiplicand will typically be used repeatedly on a given message,
-a future extension might define a vector-scalar version of this instruction where
-`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1. 
-====
 
 [NOTE]
 ====
-This instruction is identical to `vghsh.vv` with vs1=0.
+The instruction `vgmul.vv` is identical to `vghsh.vv` with vs1=0.
 This instruction is often used in GHASH code. In some cases it is followed
 by an XOR to perform a multiply-add. Implementations may choose to fuse these
-two instructions to improve performance on GHASH code that 
-doesn't use the add-multiply form of the `vghsh.vv` instruction. 
+two instructions to improve performance on GHASH code that
+doesn't use the add-multiply form of the `vghsh.vv` instruction.
+
+Similarly, the instruction `vgmul.vs` is identical to `vghsh.vs` with vs1=0.
 ====
 
 
 Operation::
 [source,pseudocode]
 --
-function clause execute (VGMUL(vs2, vs1, vd)) = {
+function clause execute (VGMUL(vs2, vs1, vd, suffix)) = {
   // operands are input with bits reversed in each byte
   if(LMUL*VLEN < EGW)  then {
     handle_illegal();  // illegal instruction exception
@@ -96,10 +114,11 @@ function clause execute (VGMUL(vs2, vs1, vd)) = {
 
   eg_len = (vl/EGS)
   eg_start = (vstart/EGS)
-  
+
   foreach (i from eg_start to eg_len-1) {
+    let helem = if suffix == "vv" then i else 0;
     let Y = brev8(get_velem(vd,EGW=128,i));  // Multiplier
-    let H = brev8(get_velem(vs2,EGW=128,i)); // Multiplicand
+    let H = brev8(get_velem(vs2,EGW=128, helem)); // Multiplicand
     let Z : bits(128) = 0;
 
     for (int bit = 0; bit < 128; bit++) {
@@ -113,7 +132,7 @@ function clause execute (VGMUL(vs2, vs1, vd)) = {
     }
 
 
-    let result = brev8(Z); 
+    let result = brev8(Z);
     set_velem(vd, EGW=128, i, result);
   }
   RETIRE_SUCCESS
@@ -122,4 +141,4 @@ function clause execute (VGMUL(vs2, vs1, vd)) = {
 --
 
 Included in::
-<<zvkg>>, <<zvkng>>, <<zvksg>>
+<<zvkg>>, <<zvkgb>>, <<zvkng>>, <<zvksg>>

From 6b8eadb83afc7a36e83c6ac462bade63b024918e Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Sun, 27 Aug 2023 10:35:05 -0700
Subject: [PATCH 04/26] adding directory with vector-crypto extra skeleton

---
 doc/vector-extra/Makefile                     |  67 +++++
 .../riscv-crypto-spec-vector-extra.adoc       | 252 ++++++++++++++++++
 .../riscv-crypto-vector-zvbc32e.adoc          |  18 ++
 .../riscv-crypto-vector-zvkgs.adoc            |  41 +++
 doc/vector-extra/vghsh-vs.adoc                | 147 ++++++++++
 doc/vector-extra/vgmul-vs.adoc                | 144 ++++++++++
 6 files changed, 669 insertions(+)
 create mode 100644 doc/vector-extra/Makefile
 create mode 100644 doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
 create mode 100644 doc/vector-extra/riscv-crypto-vector-zvbc32e.adoc
 create mode 100644 doc/vector-extra/riscv-crypto-vector-zvkgs.adoc
 create mode 100644 doc/vector-extra/vghsh-vs.adoc
 create mode 100644 doc/vector-extra/vgmul-vs.adoc

diff --git a/doc/vector-extra/Makefile b/doc/vector-extra/Makefile
new file mode 100644
index 00000000..3dd87daa
--- /dev/null
+++ b/doc/vector-extra/Makefile
@@ -0,0 +1,67 @@
+# Makefile for RISC-V Doc Template
+#
+# This work is licensed under the Creative Commons Attribution-ShareAlike 4.0
+# International License. To view a copy of this license, visit
+# http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to
+# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
+#
+# SPDX-License-Identifier: CC-BY-SA-4.0
+#
+# Description:
+#
+# This Makefile is designed to automate the process of building and packaging
+# the Doc Template for RISC-V Extensions.
+
+DOCKER_RUN := docker run --rm -v ${PWD}:/build -w /build \
+riscvintl/riscv-docs-base-container-image:latest
+HEADER_SOURCE := riscv-crypto-spec-vector-extra.adoc
+PDF_RESULT := riscv-crypto-spec-vector-extra.pdf
+SPEC_COMMIT= git-commit.adoc
+ASCIIDOCTOR_PDF := asciidoctor-pdf
+OPTIONS := --trace \
+           -a compress \
+           -a mathematical-format=svg \
+           -a pdf-fontsdir=resources/fonts \
+           -a pdf-style=resources/themes/risc-v_spec-pdf.yml \
+           -a toc \
+           --failure-level=ERROR
+REQUIRES := --require=asciidoctor-bibtex \
+            --require=asciidoctor-diagram \
+            --require=asciidoctor-mathematical
+
+.PHONY: all build clean build-container build-no-container
+
+all: build
+
+cp_bib:
+	@cp ../riscv-crypto-spec.bib ./
+
+$(SPEC_COMMIT):
+	@git rev-parse --abbrev-ref HEAD > ${@}
+	@echo "@" >> ${@}
+	@git log --pretty=format:'%H' -n 1 >> ${@}
+
+build: cp_bib $(SPEC_COMMIT)
+	@echo "Checking if Docker is available..."
+	@if command -v docker &> /dev/null ; then \
+		echo "Docker is available, building inside Docker container..."; \
+		$(MAKE) build-container; \
+	else \
+		echo "Docker is not available, building without Docker..."; \
+		$(MAKE) build-no-container; \
+	fi
+
+build-container:
+	@echo "Starting build inside Docker container..."
+	$(DOCKER_RUN) /bin/sh -c "$(ASCIIDOCTOR_PDF) $(OPTIONS) $(REQUIRES) --out-file=$(PDF_RESULT) $(HEADER_SOURCE)"
+	@echo "Build completed successfully inside Docker container."
+
+build-no-container:
+	@echo "Starting build..."
+	$(ASCIIDOCTOR_PDF) $(OPTIONS) $(REQUIRES) --out-file=$(PDF_RESULT) $(HEADER_SOURCE)
+	@echo "Build completed successfully."
+
+clean:
+	@echo "Cleaning up generated files..."
+	rm -f $(PDF_RESULT)
+	@echo "Cleanup completed."
diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
new file mode 100644
index 00000000..768d7999
--- /dev/null
+++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
@@ -0,0 +1,252 @@
+[[riscv-doc-template]]
+= RISC-V Cryptography Extensions Volume II: Vector Instructions
+:description: The vector cryptography extensions for the RISC-V ISA.
+:company: RISC-V.org
+:revdate: 08 August 2023
+:revnumber: v1.0.0
+:revremark: RC2
+:url-riscv: http://riscv.org
+:doctype: book
+//:doctype: report
+:preface-title: Preamble
+:colophon:
+:appendix-caption: Appendix
+:imagesdir: images
+:title-logo-image: image:risc-v_logo.png[pdfwidth=3.25in,align=center]
+//:page-background-image: image:draft.svg[opacity=20%]
+//:title-page-background-image: none
+//:back-cover-image: image:circuit.png[opacity=25%]
+// Settings:
+:experimental:
+:reproducible:
+// needs to be changed? bug discussion started
+:WaveDromEditorApp: wavedrom-cli
+:imagesoutdir: images
+:icons: font
+:lang: en
+:listing-caption: Listing
+:sectnums:
+:toc: left
+:toclevels: 4
+:source-highlighter: pygments
+ifdef::backend-pdf[]
+:source-highlighter: coderay
+endif::[]
+:data-uri:
+:hide-uri-scheme:
+:stem: latexmath
+:footnote:
+:xrefstyle: short
+:bibtex-file: riscv-crypto-spec.bib
+:bibtex-order: alphabetical
+:bibtex-style: ieee
+
+//:This is the preamble.
+
+[colophon]
+= Colophon
+
+This document describes the Vector Cryptography extensions to the 
+RISC-V Instruction Set Architecture.
+
+This document is _frozen_.
+Change is extremely unlikely. A high threshold will be used, and a
+change will only occur because of some truly critical issue being
+identified during the public review cycle. Any other desired or needed
+changes can be the subject of a follow-on new extension.
+For more information, see link:http://riscv.org/spec-state[here].
+
+[NOTE]
+.Copyright and licensure:
+This work is licensed under a
+link:http://creativecommons.org/licenses/by/4.0/[Creative Commons Attribution 4.0 International License]
+
+[NOTE]
+.Document Version Information:
+====
+include::git-commit.adoc[]
+
+See link:https://github.com/riscv/riscv-crypto[github.com/riscv/riscv-crypto]
+for more information.
+====
+
+[acknowledgments]
+== Acknowledgments
+
+Contributors to this specification (in alphabetical order)
+include: +
+Alan Baum,
+Barna Ibrahim,
+Barry Spinney,
+Ben Marshall,
+Derek Atkins,
+link:mailto:kdockser@tenstorrent.com[Ken Dockser] (Editor),
+Markku-Juhani O. Saarinen,
+Nicolas Brunie, 
+Richard Newell
+
+We are all very grateful to the many other people who have
+helped to improve this specification through their comments, reviews,
+feedback and questions.
+
+// ------------------------------------------------------------
+
+include::riscv-crypto-vector-introduction.adoc[]
+include::riscv-crypto-vector-audience.adoc[]
+include::riscv-crypto-vector-sail-specifications.adoc[]
+include::riscv-crypto-vector-policies.adoc[]
+
+// ------------------------------------------------------------
+
+include::./riscv-crypto-vector-element-groups.adoc[]
+include::./riscv-crypto-vector-instruction-constraints.adoc[]
+include::./riscv-crypto-vector-scalar-instructions.adoc[]
+include::./riscv-crypto-vector-software-portability.adoc[]
+<<<
+    
+// ------------------------------------------------------------
+
+
+[[crypto_vector_extensions]]
+== Extensions Overview
+
+The section introduces all of the  extensions in the Vector Cryptography
+Instruction Set Extension Specification.
+
+The <<zvknh,Zvknhb>> and <<zvbc>> Vector Crypto Extensions
+--and accordingly the composite extensions <<Zvkn>> and <<Zvks>>--
+require a Zve64x base,
+or application ("V") base Vector Extension.
+
+All of the other Vector Crypto Extensions can be built
+on _any_ embedded (Zve*) or application ("V") base Vector Extension.
+
+// See <<crypto-vector-element-groups>> for more details on vector element groups and the drawbacks of
+// small `VLEN` values.
+
+
+All _cryptography-specific_ instructions defined in this Vector Crypto specification (i.e., those
+in <<zvkned>>, <<zvknh,Zvknh[ab]>>, <<Zvkg>>, <<Zvksed>> and <<zvksh>> but _not_ <<zvbb>>,<<zvkb>>, or <<zvbc>>) shall
+be executed with data-independent execution latency as defined in the
+link:https://github.com/riscv/riscv-crypto/releases/tag/v1.0.1-scalar[RISC-V Scalar Cryptography Extensions specification].
+It is important to note that the Vector Crypto instructions are independent of the
+implementation of the `Zkt` extension and do not require that `Zkt` is implemented.
+
+This specification includes a <<Zvkt>> extension that, when implemented, requires certain vector instructions
+(including <<zvbb>>, <<zvkb>>, and <<zvbc>>) to be executed with data-independent execution latency.
+
+Detection of individual cryptography extensions uses the
+unified software-based RISC-V discovery method.
+
+[NOTE]
+====
+At the time of writing, these discovery mechanisms are still a work in
+progress.
+====
+
+include::./riscv-crypto-vector-zvbb.adoc[]
+<<<
+include::./riscv-crypto-vector-zvbc.adoc[]
+<<<
+include::./riscv-crypto-vector-zvkb.adoc[]
+<<<
+include::./riscv-crypto-vector-zvkg.adoc[]
+<<<
+include::./riscv-crypto-vector-zvkned.adoc[]
+<<<
+include::./riscv-crypto-vector-zvknh.adoc[]
+<<< 
+include::./riscv-crypto-vector-zvksed.adoc[]
+<<<
+include::./riscv-crypto-vector-zvksh.adoc[]
+<<<
+include::./riscv-crypto-vector-zvkn.adoc[]
+<<<
+include::./riscv-crypto-vector-zvknc.adoc[]
+<<<
+include::./riscv-crypto-vector-zvkng.adoc[]
+<<<
+include::./riscv-crypto-vector-zvks.adoc[]
+<<<
+include::./riscv-crypto-vector-zvksc.adoc[]
+<<<
+include::./riscv-crypto-vector-zvksg.adoc[]
+<<<
+include::./riscv-crypto-vector-zvkt.adoc[]
+<<<
+
+
+
+// ------------------------------------------------------------
+
+[[crypto_vector_insns, reftext="Vector Cryptography Instructions"]]
+== Instructions
+
+
+include::insns/vaesdf.adoc[leveloffset=+2]
+<<<
+include::insns/vaesdm.adoc[leveloffset=+2]
+<<<   
+include::insns/vaesef.adoc[leveloffset=+2]
+<<<
+include::insns/vaesem.adoc[leveloffset=+2]
+<<<
+include::insns/vaeskf1.adoc[leveloffset=+2]
+<<<
+include::insns/vaeskf2.adoc[leveloffset=+2]
+<<<
+include::insns/vaesz.adoc[leveloffset=+2]
+<<<
+include::insns/vandn.adoc[leveloffset=+2]
+<<<
+include::insns/vbrev.adoc[leveloffset=+2]
+<<<
+include::insns/vbrev8.adoc[leveloffset=+2]
+<<<
+include::insns/vclmul.adoc[leveloffset=+2]
+<<<
+include::insns/vclmulh.adoc[leveloffset=+2]
+<<<
+include::insns/vclz.adoc[leveloffset=+2]
+<<<
+include::insns/vcpop.adoc[leveloffset=+2]
+<<<
+include::insns/vctz.adoc[leveloffset=+2]
+<<<
+include::insns/vghsh.adoc[leveloffset=+2]
+<<<
+include::insns/vgmul.adoc[leveloffset=+2]
+<<<
+include::insns/vrev8.adoc[leveloffset=+2]
+<<<
+include::insns/vrol.adoc[leveloffset=+2]
+<<<
+include::insns/vror.adoc[leveloffset=+2]
+<<<
+include::insns/vsha2c.adoc[leveloffset=+2]
+<<<
+include::insns/vsha2ms.adoc[leveloffset=+2]
+<<<
+include::insns/vsm3c.adoc[leveloffset=+2]
+<<<
+include::insns/vsm3me.adoc[leveloffset=+2]
+<<<
+include::insns/vsm4k.adoc[leveloffset=+2]
+<<<
+include::insns/vsm4r.adoc[leveloffset=+2]
+<<<
+include::insns/vwsll.adoc[leveloffset=+2]
+<<<
+
+[[bibliography]]
+== Bibliography
+
+bibliography::../riscv-crypto-spec.bib[ieee]
+
+[[Encodings]]
+== Encodings
+include::./riscv-crypto-vector-inst-table.adoc[]
+include::./riscv-crypto-vector-inst-table-zvbb-zvbc.adoc[]
+
+
+include::./riscv-crypto-vector-appx-sail.adoc[]
diff --git a/doc/vector-extra/riscv-crypto-vector-zvbc32e.adoc b/doc/vector-extra/riscv-crypto-vector-zvbc32e.adoc
new file mode 100644
index 00000000..7bd8e84e
--- /dev/null
+++ b/doc/vector-extra/riscv-crypto-vector-zvbc32e.adoc
@@ -0,0 +1,18 @@
+[[zvbc,Zvbc]]
+=== `Zvbc` - Vector Carryless Multiplication
+
+General purpose carryless multiplication instructions which are commonly used in cryptography
+and hashing (e.g., Elliptic curve cryptography, GHASH, CRC).
+
+These instructions are only defined for `SEW`=64.
+
+[%autowidth]
+[%header,cols="^2,4"]
+|===
+|Mnemonic
+|Instruction
+| vclmul.[vv,vx]     | <<insns-vclmul>>
+| vclmulh.[vv,vx]    | <<insns-vclmulh>>
+
+|===
+
diff --git a/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc b/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc
new file mode 100644
index 00000000..254e2ade
--- /dev/null
+++ b/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc
@@ -0,0 +1,41 @@
+[[zvkg,Zvkg]]
+=== `Zvkg` - Vector GCM/GMAC
+
+Instructions to enable the efficient implementation of GHASH~H~ which is used in Galois/Counter Mode (GCM) and
+Galois Message Authentication Code (GMAC).
+
+All of these instructions work on 128-bit element groups comprised of four 32-bit elements.
+
+GHASH~H~ is defined in the
+// link:https://csrc.nist.gov/publications/detail/sp/800-38d/final[NIST Special Publication 800-38D]
+ "Recommendation for Block Cipher Modes of Operation: Galois/Counter Mode (GCM) and GMAC"
+ cite:[nist:gcm]
+(NIST Specification).
+
+[NOTE]
+====
+GCM is used in conjunction with block ciphers (e.g., AES and SM4) to encrypt a message and
+provide authentication.
+GMAC is used to provide authentication of a message without encryption.
+====
+
+To help avoid side-channel timing attacks, these instructions shall be implemented with data-independent timing.
+
+The number of element groups to be processed is `vl`/`EGS`.
+`vl` must be set to the number of `SEW=32` elements to be processed and
+therefore must be a multiple of `EGS=4`. +
+Likewise, `vstart` must be a multiple of `EGS=4`.
+
+[%autowidth]
+[%header,cols="^2,4,4,4"]
+|===
+
+|SEW
+|EGW
+|Mnemonic
+|Instruction
+| 32 | 128 | vghsh.vv | <<insns-vghsh>>
+| 32 | 128 | vgmul.vv | <<insns-vgmul>>
+
+|===
+
diff --git a/doc/vector-extra/vghsh-vs.adoc b/doc/vector-extra/vghsh-vs.adoc
new file mode 100644
index 00000000..b487b11d
--- /dev/null
+++ b/doc/vector-extra/vghsh-vs.adoc
@@ -0,0 +1,147 @@
+[[insns-vghsh, Vector GHASH Add-Multiply]]
+= vghsh.[vv,vs]
+
+Synopsis::
+Vector Add-Multiply over GHASH Galois-Field
+
+Mnemonic::
+vghsh.vv vd, vs2, vs1 +
+vghsh.vs vd, rs2, vs1
+
+Encoding (Vector-Vector)::
+[wavedrom, , svg]
+....
+{reg:[
+{bits: 7, name: 'OP-P'},
+{bits: 5, name: 'vd'},
+{bits: 3, name: 'OPMVV'},
+{bits: 5, name: 'vs1'},
+{bits: 5, name: 'vs2'},
+{bits: 1, name: '1'},
+{bits: 6, name: '101100'},
+]}
+....
+
+// This might be the first instruction with 3 operands and .vs
+// need to find an encoding
+Encoding (Vector-Scalar)::
+[wavedrom, , svg]
+....
+{reg:[
+{bits: 7, name: 'OP-P'},
+{bits: 5, name: 'vd'},
+{bits: 3, name: 'OPMVV'},
+{bits: 5, name: 'vs1'},
+{bits: 5, name: 'vs2'},
+{bits: 1, name: '1'},
+{bits: 6, name: '101100'},
+]}
+....
+
+Reserved Encodings::
+* `SEW` is any value other than 32
+* `vghsh.vs` encoding (except if `Zvkgb` is enabled)
+
+Arguments::
+
+[%autowidth]
+[%header,cols="4,2,2,2,2,2"]
+|===
+|Register
+|Direction
+|EGW
+|EGS
+|SEW
+|Definition
+
+| Vd  | input  | 128  | 4 | 32 | Partial hash (Y~i~)
+| Vs1 | input  | 128  | 4 | 32 | Cipher text (X~i~)
+| Vs2 | input  | 128  | 4 | 32 | Hash Subkey (H)
+| Vd  | output | 128  | 4 | 32 | Partial-hash (Y~i+1~)
+|===
+
+Description::
+A single "iteration" of the GHASH~H~ algorithm is performed.
+
+
+The previous partial hashes are read as 4-element groups from 'vd',
+the cipher texts are read as 4-element groups from `vs1`
+ and the hash subkeys are read from either the corresponding 4-element group
+in `vs2` (vector-vector form) or the scalar element group in `vs2`
+(vector-scalar form, `Zvkgb` only). The resulting partial hashes are writen as 4-element groups into `vd`.
+
+
+This instruction treats all of the input and output element groups as 128-bit polynomials and
+performs operations over GF[2].
+It produces the next partial hash (Y~i+1~) by adding the current partial
+hash (Y~i~) to the cipher text block (X~i~) and then multiplying (over GF(2^128^))
+this sum by the Hash Subkey (H).
+
+The multiplication over GF(2^128^) is a carryless multiply of two 128-bit polynomials
+modulo GHASH's irreducible polynomial (x^128^ + x^7^ + x^2^ + x + 1).
+
+The operation can be compactly defined as
+// Y~i+1~ = (Y~i~ &#183; H) ^ X~i~
+Y~i+1~ = ((Y~i~ ^ X~i~) &#183; H)
+
+The NIST specification (see <<zvkg>>) orders the coefficients from left to right x~0~x~1~x~2~...x~127~
+for a polynomial x~0~ + x~1~u +x~2~ u^2^ + ... + x~127~u^127^. This can be viewed as a collection of
+byte elements in memory with the byte containing the lowest coefficients (i.e., 0,1,2,3,4,5,6,7)
+residing at the lowest memory address. Since the bits in the bytes are reversed,
+This instruction internally performs bit swaps within bytes to put the bits in the standard ordering
+(e.g., 7,6,5,4,3,2,1,0).
+
+This instruction must always be implemented such that its execution latency does not depend
+on the data being operated upon.
+
+[NOTE]
+====
+We are bit-reversing the bytes of inputs and outputs so that the intermediate values are consistent
+with the NIST specification. These reversals are inexpensive to implement as they unconditionally
+swap bit positions and therefore do not require any logic.
+====
+
+
+Operation::
+[source,pseudocode]
+--
+function clause execute (VGHSH(vs2, vs1, vd, suffix)) = {
+  // operands are input with bits reversed in each byte
+  if(LMUL*VLEN < EGW)  then {
+    handle_illegal();  // illegal instruction exception
+    RETIRE_FAIL
+  } else {
+
+  eg_len = (vl/EGS)
+  eg_start = (vstart/EGS)
+
+  foreach (i from eg_start to eg_len-1) {
+    let helem = if suffix == "vv" then i else 0;
+    let Y = (get_velem(vd,EGW=128,i));  // current partial-hash
+    let X = get_velem(vs1,EGW=128,i);  // block cipher output
+    let H = brev8(get_velem(vs2, EGW=128, helem)); // Hash subkey
+
+    let Z : bits(128) = 0;
+
+    let S = brev8(Y ^ X);
+
+    for (int bit = 0; bit < 128; bit++) {
+      if bit_to_bool(S[bit])
+        Z ^= H
+
+      bool reduce = bit_to_bool(H[127]);
+      H = H << 1; // left shift H by 1
+      if (reduce)
+        H ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial
+    }
+
+    let result = brev8(Z); // bit reverse bytes to get back to GCM standard ordering
+    set_velem(vd, EGW=128, i, result);
+  }
+  RETIRE_SUCCESS
+ }
+}
+--
+
+Included in::
+<<zvkg>>, <<zvkgb>>, <<zvkng>>, <<zvksg>>
diff --git a/doc/vector-extra/vgmul-vs.adoc b/doc/vector-extra/vgmul-vs.adoc
new file mode 100644
index 00000000..ca858010
--- /dev/null
+++ b/doc/vector-extra/vgmul-vs.adoc
@@ -0,0 +1,144 @@
+[[insns-vgmul, Vector GHASH Multiply]]
+= vgmul.vv
+
+Synopsis::
+Vector Multiply over GHASH Galois-Field
+
+Mnemonic::
+vgmul.vv vd, vs2
+
+Encoding (Vector-Vector)::
+[wavedrom, , svg]
+....
+{reg:[
+{bits: 7, name: 'OP-P'},
+{bits: 5, name: 'vd'},
+{bits: 3, name: 'OPMVV'},
+{bits: 5, name: '10001'},
+{bits: 5, name: 'vs2'},
+{bits: 1, name: '1'},
+{bits: 6, name: '101000'},
+]}
+....
+
+
+Encoding (Vector-Scalar)::
+[wavedrom, , svg]
+....
+{reg:[
+{bits: 7, name: 'OP-P'},
+{bits: 5, name: 'vd'},
+{bits: 3, name: 'OPMVV'},
+{bits: 5, name: '10001'},
+{bits: 5, name: 'vs2'},
+{bits: 1, name: '1'},
+{bits: 6, name: '101001'},
+]}
+....
+
+Reserved Encodings::
+* `SEW` is any value other than 32
+* `vgmul.vs` encoding (except if `Zvkgb` is enabled)
+
+Arguments::
+
+[%autowidth]
+[%header,cols="4,2,2,2,2,2"]
+|===
+|Register
+|Direction
+|EGW
+|EGS
+|SEW
+|Definition
+
+| Vd  | input  | 128  | 4 | 32 | Multiplier
+| Vs2 | input  | 128  | 4 | 32 | Multiplicand
+| Vd  | output | 128  | 4 | 32 | Product
+|===
+
+Description::
+A GHASH~H~ multiply is performed.
+
+The multipliers are read as 4-element groups from 'vd',
+ the multiplicands subkeys are read from either the corresponding 4-element group
+in `vs2` (vector-vector form) or the scalar element group in `vs2`
+(vector-scalar form, `Zvkgb` only). The resulting products are written as 4-element groups into `vd`.
+
+This instruction treats all of the inputs and outputs as 128-bit polynomials and 
+performs operations over GF[2].
+It produces the product over GF(2^128^) of the two 128-bit inputs.
+
+The multiplication over GF(2^128^) is a carryless multiply of two 128-bit polynomials
+modulo GHASH's irreducible polynomial (x^128^ + x^7^ + x^2^ + x + 1).
+
+The NIST specification (see <<zvkg>>) orders the coefficients from left to right x~0~x~1~x~2~...x~127~
+for a polynomial x~0~ + x~1~u +x~2~ u^2^ + ... + x~127~u^127^. This can be viewed as a collection of
+byte elements in memory with the byte containing the lowest coefficients (i.e., 0,1,2,3,4,5,6,7)
+residing at the lowest memory address. Since the bits in the bytes are reversed, 
+This instruction internally performs bit swaps within bytes to put the bits in the standard ordering
+(e.g., 7,6,5,4,3,2,1,0).
+
+This instruction must always be implemented such that its execution latency does not depend
+on the data being operated upon.
+
+[NOTE]
+====
+We are bit-reversing the bytes of inputs and outputs so that the intermediate values are consistent
+with the NIST specification. These reversals are inexpensive to implement as they unconditionally
+swap bit positions and therefore do not require any logic.
+====
+
+
+[NOTE]
+====
+The instruction `vgmul.vv` is identical to `vghsh.vv` with vs1=0.
+This instruction is often used in GHASH code. In some cases it is followed
+by an XOR to perform a multiply-add. Implementations may choose to fuse these
+two instructions to improve performance on GHASH code that
+doesn't use the add-multiply form of the `vghsh.vv` instruction.
+
+Similarly, the instruction `vgmul.vs` is identical to `vghsh.vs` with vs1=0.
+====
+
+
+Operation::
+[source,pseudocode]
+--
+function clause execute (VGMUL(vs2, vs1, vd, suffix)) = {
+  // operands are input with bits reversed in each byte
+  if(LMUL*VLEN < EGW)  then {
+    handle_illegal();  // illegal instruction exception
+    RETIRE_FAIL
+  } else {
+
+  eg_len = (vl/EGS)
+  eg_start = (vstart/EGS)
+
+  foreach (i from eg_start to eg_len-1) {
+    let helem = if suffix == "vv" then i else 0;
+    let Y = brev8(get_velem(vd,EGW=128,i));  // Multiplier
+    let H = brev8(get_velem(vs2,EGW=128, helem)); // Multiplicand
+    let Z : bits(128) = 0;
+
+    for (int bit = 0; bit < 128; bit++) {
+      if bit_to_bool(Y[bit])
+        Z ^= H
+
+      bool reduce = bit_to_bool(H[127]);
+      H = H << 1; // left shift H by 1
+      if (reduce)
+        H ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial
+    }
+
+
+    let result = brev8(Z);
+    set_velem(vd, EGW=128, i, result);
+  }
+  RETIRE_SUCCESS
+ }
+}
+--
+
+Included in::
+<<zvkg>>, <<zvkgb>>, <<zvkng>>, <<zvksg>>

From c10f7456b43199b55dd65113cd7a798e96459a05 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Sun, 27 Aug 2023 10:35:37 -0700
Subject: [PATCH 05/26] Revert "Completing vghsh.vs/vgmul.vs descriptions"

This reverts commit bc7f52746c27030133b47fafe3247a7821ac3a45.
---
 doc/vector/insns/vghsh.adoc | 21 +++++++--------
 doc/vector/insns/vgmul.adoc | 53 ++++++++++++-------------------------
 2 files changed, 26 insertions(+), 48 deletions(-)

diff --git a/doc/vector/insns/vghsh.adoc b/doc/vector/insns/vghsh.adoc
index b487b11d..bb9c97a4 100644
--- a/doc/vector/insns/vghsh.adoc
+++ b/doc/vector/insns/vghsh.adoc
@@ -6,7 +6,7 @@ Vector Add-Multiply over GHASH Galois-Field
 
 Mnemonic::
 vghsh.vv vd, vs2, vs1 +
-vghsh.vs vd, rs2, vs1
+vghsh.vs vd, vs2, vs1
 
 Encoding (Vector-Vector)::
 [wavedrom, , svg]
@@ -40,7 +40,6 @@ Encoding (Vector-Scalar)::
 
 Reserved Encodings::
 * `SEW` is any value other than 32
-* `vghsh.vs` encoding (except if `Zvkgb` is enabled)
 
 Arguments::
 
@@ -63,15 +62,7 @@ Arguments::
 Description::
 A single "iteration" of the GHASH~H~ algorithm is performed.
 
-
-The previous partial hashes are read as 4-element groups from 'vd',
-the cipher texts are read as 4-element groups from `vs1`
- and the hash subkeys are read from either the corresponding 4-element group
-in `vs2` (vector-vector form) or the scalar element group in `vs2`
-(vector-scalar form, `Zvkgb` only). The resulting partial hashes are writen as 4-element groups into `vd`.
-
-
-This instruction treats all of the input and output element groups as 128-bit polynomials and
+This instruction treats all of the inputs and outputs as 128-bit polynomials and
 performs operations over GF[2].
 It produces the next partial hash (Y~i+1~) by adding the current partial
 hash (Y~i~) to the cipher text block (X~i~) and then multiplying (over GF(2^128^))
@@ -101,11 +92,17 @@ with the NIST specification. These reversals are inexpensive to implement as the
 swap bit positions and therefore do not require any logic.
 ====
 
+[NOTE]
+====
+Since the same hash subkey `H` will typically be used repeatedly on a given message,
+a future extension might define a vector-scalar version of this instruction where
+`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1.
+====
 
 Operation::
 [source,pseudocode]
 --
-function clause execute (VGHSH(vs2, vs1, vd, suffix)) = {
+function clause execute (VGHSH(vs2, vs1, vd)) = {
   // operands are input with bits reversed in each byte
   if(LMUL*VLEN < EGW)  then {
     handle_illegal();  // illegal instruction exception
diff --git a/doc/vector/insns/vgmul.adoc b/doc/vector/insns/vgmul.adoc
index ca858010..0008132c 100644
--- a/doc/vector/insns/vgmul.adoc
+++ b/doc/vector/insns/vgmul.adoc
@@ -7,7 +7,7 @@ Vector Multiply over GHASH Galois-Field
 Mnemonic::
 vgmul.vv vd, vs2
 
-Encoding (Vector-Vector)::
+Encoding::
 [wavedrom, , svg]
 ....
 {reg:[
@@ -20,25 +20,8 @@ Encoding (Vector-Vector)::
 {bits: 6, name: '101000'},
 ]}
 ....
-
-
-Encoding (Vector-Scalar)::
-[wavedrom, , svg]
-....
-{reg:[
-{bits: 7, name: 'OP-P'},
-{bits: 5, name: 'vd'},
-{bits: 3, name: 'OPMVV'},
-{bits: 5, name: '10001'},
-{bits: 5, name: 'vs2'},
-{bits: 1, name: '1'},
-{bits: 6, name: '101001'},
-]}
-....
-
 Reserved Encodings::
-* `SEW` is any value other than 32
-* `vgmul.vs` encoding (except if `Zvkgb` is enabled)
+* `SEW` is any value other than 32 
 
 Arguments::
 
@@ -57,14 +40,9 @@ Arguments::
 | Vd  | output | 128  | 4 | 32 | Product
 |===
 
-Description::
+Description:: 
 A GHASH~H~ multiply is performed.
 
-The multipliers are read as 4-element groups from 'vd',
- the multiplicands subkeys are read from either the corresponding 4-element group
-in `vs2` (vector-vector form) or the scalar element group in `vs2`
-(vector-scalar form, `Zvkgb` only). The resulting products are written as 4-element groups into `vd`.
-
 This instruction treats all of the inputs and outputs as 128-bit polynomials and 
 performs operations over GF[2].
 It produces the product over GF(2^128^) of the two 128-bit inputs.
@@ -89,23 +67,27 @@ with the NIST specification. These reversals are inexpensive to implement as the
 swap bit positions and therefore do not require any logic.
 ====
 
+[NOTE]
+====
+Since the same multiplicand will typically be used repeatedly on a given message,
+a future extension might define a vector-scalar version of this instruction where
+`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1. 
+====
 
 [NOTE]
 ====
-The instruction `vgmul.vv` is identical to `vghsh.vv` with vs1=0.
+This instruction is identical to `vghsh.vv` with vs1=0.
 This instruction is often used in GHASH code. In some cases it is followed
 by an XOR to perform a multiply-add. Implementations may choose to fuse these
-two instructions to improve performance on GHASH code that
-doesn't use the add-multiply form of the `vghsh.vv` instruction.
-
-Similarly, the instruction `vgmul.vs` is identical to `vghsh.vs` with vs1=0.
+two instructions to improve performance on GHASH code that 
+doesn't use the add-multiply form of the `vghsh.vv` instruction. 
 ====
 
 
 Operation::
 [source,pseudocode]
 --
-function clause execute (VGMUL(vs2, vs1, vd, suffix)) = {
+function clause execute (VGMUL(vs2, vs1, vd)) = {
   // operands are input with bits reversed in each byte
   if(LMUL*VLEN < EGW)  then {
     handle_illegal();  // illegal instruction exception
@@ -114,11 +96,10 @@ function clause execute (VGMUL(vs2, vs1, vd, suffix)) = {
 
   eg_len = (vl/EGS)
   eg_start = (vstart/EGS)
-
+  
   foreach (i from eg_start to eg_len-1) {
-    let helem = if suffix == "vv" then i else 0;
     let Y = brev8(get_velem(vd,EGW=128,i));  // Multiplier
-    let H = brev8(get_velem(vs2,EGW=128, helem)); // Multiplicand
+    let H = brev8(get_velem(vs2,EGW=128,i)); // Multiplicand
     let Z : bits(128) = 0;
 
     for (int bit = 0; bit < 128; bit++) {
@@ -132,7 +113,7 @@ function clause execute (VGMUL(vs2, vs1, vd, suffix)) = {
     }
 
 
-    let result = brev8(Z);
+    let result = brev8(Z); 
     set_velem(vd, EGW=128, i, result);
   }
   RETIRE_SUCCESS
@@ -141,4 +122,4 @@ function clause execute (VGMUL(vs2, vs1, vd, suffix)) = {
 --
 
 Included in::
-<<zvkg>>, <<zvkgb>>, <<zvkng>>, <<zvksg>>
+<<zvkg>>, <<zvkng>>, <<zvksg>>

From 4e70f70c60fa247529e282f2a59d505ddd9dd7a7 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Sun, 27 Aug 2023 10:35:40 -0700
Subject: [PATCH 06/26] Revert "[Zv fast track] prototyping vg* changes"

This reverts commit a1bfcfce97ab683584bb9da4dc672be511c4a5cc.
---
 doc/vector/insns/vghsh.adoc | 41 ++++++++++---------------------------
 1 file changed, 11 insertions(+), 30 deletions(-)

diff --git a/doc/vector/insns/vghsh.adoc b/doc/vector/insns/vghsh.adoc
index bb9c97a4..cd02b0e6 100644
--- a/doc/vector/insns/vghsh.adoc
+++ b/doc/vector/insns/vghsh.adoc
@@ -1,14 +1,13 @@
 [[insns-vghsh, Vector GHASH Add-Multiply]]
-= vghsh.[vv,vs]
+= vghsh.vv
 
 Synopsis::
 Vector Add-Multiply over GHASH Galois-Field
 
 Mnemonic::
-vghsh.vv vd, vs2, vs1 +
-vghsh.vs vd, vs2, vs1
+vghsh.vv vd, vs2, vs1
 
-Encoding (Vector-Vector)::
+Encoding::
 [wavedrom, , svg]
 ....
 {reg:[
@@ -21,25 +20,8 @@ Encoding (Vector-Vector)::
 {bits: 6, name: '101100'},
 ]}
 ....
-
-// This might be the first instruction with 3 operands and .vs
-// need to find an encoding
-Encoding (Vector-Scalar)::
-[wavedrom, , svg]
-....
-{reg:[
-{bits: 7, name: 'OP-P'},
-{bits: 5, name: 'vd'},
-{bits: 3, name: 'OPMVV'},
-{bits: 5, name: 'vs1'},
-{bits: 5, name: 'vs2'},
-{bits: 1, name: '1'},
-{bits: 6, name: '101100'},
-]}
-....
-
 Reserved Encodings::
-* `SEW` is any value other than 32
+* `SEW` is any value other than 32 
 
 Arguments::
 
@@ -59,10 +41,10 @@ Arguments::
 | Vd  | output | 128  | 4 | 32 | Partial-hash (Y~i+1~)
 |===
 
-Description::
+Description:: 
 A single "iteration" of the GHASH~H~ algorithm is performed.
 
-This instruction treats all of the inputs and outputs as 128-bit polynomials and
+This instruction treats all of the inputs and outputs as 128-bit polynomials and 
 performs operations over GF[2].
 It produces the next partial hash (Y~i+1~) by adding the current partial
 hash (Y~i~) to the cipher text block (X~i~) and then multiplying (over GF(2^128^))
@@ -78,7 +60,7 @@ Y~i+1~ = ((Y~i~ ^ X~i~) &#183; H)
 The NIST specification (see <<zvkg>>) orders the coefficients from left to right x~0~x~1~x~2~...x~127~
 for a polynomial x~0~ + x~1~u +x~2~ u^2^ + ... + x~127~u^127^. This can be viewed as a collection of
 byte elements in memory with the byte containing the lowest coefficients (i.e., 0,1,2,3,4,5,6,7)
-residing at the lowest memory address. Since the bits in the bytes are reversed,
+residing at the lowest memory address. Since the bits in the bytes are reversed, 
 This instruction internally performs bit swaps within bytes to put the bits in the standard ordering
 (e.g., 7,6,5,4,3,2,1,0).
 
@@ -96,7 +78,7 @@ swap bit positions and therefore do not require any logic.
 ====
 Since the same hash subkey `H` will typically be used repeatedly on a given message,
 a future extension might define a vector-scalar version of this instruction where
-`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1.
+`vs2` is the scalar element group. This would help reduce register pressure when `LMUL` > 1. 
 ====
 
 Operation::
@@ -111,12 +93,11 @@ function clause execute (VGHSH(vs2, vs1, vd)) = {
 
   eg_len = (vl/EGS)
   eg_start = (vstart/EGS)
-
+  
   foreach (i from eg_start to eg_len-1) {
-    let helem = if suffix == "vv" then i else 0;
     let Y = (get_velem(vd,EGW=128,i));  // current partial-hash
     let X = get_velem(vs1,EGW=128,i);  // block cipher output
-    let H = brev8(get_velem(vs2, EGW=128, helem)); // Hash subkey
+    let H = brev8(get_velem(vs2,EGW=128,i)); // Hash subkey
 
     let Z : bits(128) = 0;
 
@@ -141,4 +122,4 @@ function clause execute (VGHSH(vs2, vs1, vd)) = {
 --
 
 Included in::
-<<zvkg>>, <<zvkgb>>, <<zvkng>>, <<zvksg>>
+<<zvkg>>, <<zvkng>>, <<zvksg>>

From b0af2774d52cf796fbf4620be808c7442c963784 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Sun, 27 Aug 2023 10:35:42 -0700
Subject: [PATCH 07/26] Revert "[Zv fast track] prototyping vclmul* changes"

This reverts commit 4a59d4ba083a1ae4a5d24c6dff726f2508d9a245.
---
 doc/vector/insns/vclmul.adoc  | 21 ++++++++++++---------
 doc/vector/insns/vclmulh.adoc | 19 +++++++++----------
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/doc/vector/insns/vclmul.adoc b/doc/vector/insns/vclmul.adoc
index 499d7562..ad941888 100644
--- a/doc/vector/insns/vclmul.adoc
+++ b/doc/vector/insns/vclmul.adoc
@@ -36,8 +36,7 @@ Encoding (Vector-Scalar)::
 ]}
 ....
 Reserved Encodings::
-* `SEW` is any value other than 64 (`Zvbc`)
-* `SEW` is any value other than 32 or 64 (`Zvbcb`)
+* `SEW` is any value other than 64
 
 Arguments::
 
@@ -56,16 +55,20 @@ Arguments::
 Description::
 Produces the low half of 128-bit carry-less product.
 
-Each SEW-bit element in the `vs2` vector register is carry-less multiplied by
-either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value
+Each 64-bit element in the `vs2` vector register is carry-less multiplied by 
+either each 64-bit element in `vs1` (vector-vector), or the 64-bit value
 from integer register `rs1` (vector-scalar). The result is the least
-significant SEW bits of the carry-less product.
+significant 64 bits of the carry-less product.
 
 [NOTE]
 ====
 The 64-bit carryless multiply instructions can be used for implementing GCM in the absence of the `zvkg` extension.
 We do not make these instructions exclusive as the 64-bit carryless multiply is readily derived from the
 instructions in the `zvkg` extension and can have utility in other areas.
+Likewise, we treat other SEW values as reserved so as not to preclude
+future extensions from using this opcode with different element widths.
+For example, a future extension might define an `SEW`=32 version of this instruction to enable `Zve32*` implementations to have
+vector carryless multiplication instructions.
 ====
 
 Operation::
@@ -76,10 +79,10 @@ Operation::
 function clause execute (VCLMUL(vs2, vs1, vd, suffix)) = {
 
   foreach (i from vstart to vl-1) {
-    let op1 : bits (SEW) = if suffix =="vv" then get_velem(vs1, i)
+    let op1 : bits (64) = if suffix =="vv" then get_velem(vs1,i)
                           else zext_or_truncate_to_sew(X(vs1));
-    let op2 : bits (SEW) = get_velem(vs2, i);
-    let product : bits (SEW) = clmul(op1, op2, SEW);
+    let op2 : bits (64) = get_velem(vs2,i);
+    let product : bits (64) = clmul(op1,op2,SEW);
     set_velem(vd, i, product);
   }
   RETIRE_SUCCESS
@@ -95,4 +98,4 @@ function clmul(x, y, width) = {
 --
 
 Included in::
-<<zvbc>>, <<zvknc>>, <<zvksc>>, <<zvbcb>>
+<<zvbc>>, <<zvknc>>, <<zvksc>>
diff --git a/doc/vector/insns/vclmulh.adoc b/doc/vector/insns/vclmulh.adoc
index b5c0acb7..44f125ce 100644
--- a/doc/vector/insns/vclmulh.adoc
+++ b/doc/vector/insns/vclmulh.adoc
@@ -36,8 +36,7 @@ Encoding (Vector-Scalar)::
 ]}
 ....
 Reserved Encodings::
-* `SEW` is any value other than 64 (`Zvbcb`)
-* `SEW` is any value other than 32 or 64 (`Zvbcb`)
+* `SEW` is any value other than 64
 
 Arguments::
 
@@ -53,13 +52,13 @@ Arguments::
 | Vd  | output | carry-less product high
 |===
 
-Description::
+Description:: 
 Produces the high half of 128-bit carry-less product.
 
-Each SEW-bit element in the `vs2` vector register is carry-less multiplied by
-either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value
+Each 64-bit element in the `vs2` vector register is carry-less multiplied by 
+either each 64-bit element in `vs1` (vector-vector), or the 64-bit value
 from integer register `rs1` (vector-scalar). The result is the most
-significant SEW bits of the carry-less product.
+significant 64 bits of the carry-less product.
 
 // This instruction must always be implemented such that its execution latency does not depend
 // on the data being operated upon.
@@ -70,10 +69,10 @@ Operation::
 function clause execute (VCLMULH(vs2, vs1, vd, suffix)) = {
 
   foreach (i from vstart to vl-1) {
-    let op1 : bits (SEW) = if suffix =="vv" then get_velem(vs1,i)
+    let op1 : bits (64) = if suffix =="vv" then get_velem(vs1,i)
                           else zext_or_truncate_to_sew(X(vs1));
-    let op2 : bits (SEW) = get_velem(vs2, i);
-    let product : bits (SEW) = clmulh(op1, op2, SEW);
+    let op2 : bits (64) = get_velem(vs2, i);
+    let product : bits (64) = clmulh(op1, op2, SEW);
     set_velem(vd, i, product);
   }
   RETIRE_SUCCESS
@@ -90,4 +89,4 @@ function clmulh(x, y, width) = {
 --
 
 Included in::
-<<zvbc>>, <<zvbcb>>, <<zvknc>>, <<zvksc>>
+<<zvbc>>, <<zvknc>>, <<zvksc>>

From 72084cdd6edd904242c77ca8383b3ee6fba593d5 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Sun, 27 Aug 2023 10:44:05 -0700
Subject: [PATCH 08/26] refactoring Zvkgs and vghsh.vs specifications

---
 .../riscv-crypto-vector-zvkgs.adoc            | 27 ++++--------
 doc/vector-extra/vghsh-vs.adoc                | 43 +++++++------------
 2 files changed, 25 insertions(+), 45 deletions(-)

diff --git a/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc b/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc
index 254e2ade..40787c63 100644
--- a/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc
+++ b/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc
@@ -1,23 +1,14 @@
-[[zvkg,Zvkg]]
-=== `Zvkg` - Vector GCM/GMAC
+[[zvkgs,Zvkgs]]
+=== `Zvkgs` - Vector-Scalar GCM/GMAC
 
-Instructions to enable the efficient implementation of GHASH~H~ which is used in Galois/Counter Mode (GCM) and
-Galois Message Authentication Code (GMAC).
+`Zvkgs` depends on `Zvkg`, it extends the existing `vghsh.vv` and `vgmul.vv` instructions with new vector-scalar variants: `vghsh.vs` and `vgmul.vs`.
 
-All of these instructions work on 128-bit element groups comprised of four 32-bit elements.
+Instructions to enable the efficient implementation of parallel versions of GHASH~H~ which is used in Galois/Counter Mode (GCM) and
+Galois Message Authentication Code (GMAC).
 
-GHASH~H~ is defined in the
-// link:https://csrc.nist.gov/publications/detail/sp/800-38d/final[NIST Special Publication 800-38D]
- "Recommendation for Block Cipher Modes of Operation: Galois/Counter Mode (GCM) and GMAC"
- cite:[nist:gcm]
-(NIST Specification).
+The instructions inherit the same constraints (element group size, data independent execution timing and `vl`/`vstart` multiple constraints).
 
-[NOTE]
-====
-GCM is used in conjunction with block ciphers (e.g., AES and SM4) to encrypt a message and
-provide authentication.
-GMAC is used to provide authentication of a message without encryption.
-====
+All of these instructions work on 128-bit element groups comprised of four 32-bit elements.
 
 To help avoid side-channel timing attacks, these instructions shall be implemented with data-independent timing.
 
@@ -34,8 +25,8 @@ Likewise, `vstart` must be a multiple of `EGS=4`.
 |EGW
 |Mnemonic
 |Instruction
-| 32 | 128 | vghsh.vv | <<insns-vghsh>>
-| 32 | 128 | vgmul.vv | <<insns-vgmul>>
+| 32 | 128 | vghsh.vs | <<insns-vghsh-vs>>
+| 32 | 128 | vgmul.vs | <<insns-vgmul-vs>>
 
 |===
 
diff --git a/doc/vector-extra/vghsh-vs.adoc b/doc/vector-extra/vghsh-vs.adoc
index b487b11d..1d531381 100644
--- a/doc/vector-extra/vghsh-vs.adoc
+++ b/doc/vector-extra/vghsh-vs.adoc
@@ -1,26 +1,12 @@
-[[insns-vghsh, Vector GHASH Add-Multiply]]
-= vghsh.[vv,vs]
+[[insns-vghsh-vs, Vector-Scalar GHASH Add-Multiply]]
+= vghsh.vs
 
 Synopsis::
-Vector Add-Multiply over GHASH Galois-Field
+Vector-Scalar Add-Multiply over GHASH Galois-Field
 
 Mnemonic::
-vghsh.vv vd, vs2, vs1 +
-vghsh.vs vd, rs2, vs1
+vghsh.vs vd, vs2, vs1
 
-Encoding (Vector-Vector)::
-[wavedrom, , svg]
-....
-{reg:[
-{bits: 7, name: 'OP-P'},
-{bits: 5, name: 'vd'},
-{bits: 3, name: 'OPMVV'},
-{bits: 5, name: 'vs1'},
-{bits: 5, name: 'vs2'},
-{bits: 1, name: '1'},
-{bits: 6, name: '101100'},
-]}
-....
 
 // This might be the first instruction with 3 operands and .vs
 // need to find an encoding
@@ -40,7 +26,6 @@ Encoding (Vector-Scalar)::
 
 Reserved Encodings::
 * `SEW` is any value other than 32
-* `vghsh.vs` encoding (except if `Zvkgb` is enabled)
 
 Arguments::
 
@@ -66,10 +51,12 @@ A single "iteration" of the GHASH~H~ algorithm is performed.
 
 The previous partial hashes are read as 4-element groups from 'vd',
 the cipher texts are read as 4-element groups from `vs1`
- and the hash subkeys are read from either the corresponding 4-element group
-in `vs2` (vector-vector form) or the scalar element group in `vs2`
-(vector-scalar form, `Zvkgb` only). The resulting partial hashes are writen as 4-element groups into `vd`.
+ and the hash subkeys are read from the scalar element group in `vs2`
+The resulting partial hashes are writen as 4-element groups into `vd`.
+
 
+// The following is copied from vghsh.vv and could be omitted
+// (replaced with a link to the original specification)
 
 This instruction treats all of the input and output element groups as 128-bit polynomials and
 performs operations over GF[2].
@@ -105,7 +92,7 @@ swap bit positions and therefore do not require any logic.
 Operation::
 [source,pseudocode]
 --
-function clause execute (VGHSH(vs2, vs1, vd, suffix)) = {
+function clause execute (VGHSHVS(vs2, vs1, vd)) = {
   // operands are input with bits reversed in each byte
   if(LMUL*VLEN < EGW)  then {
     handle_illegal();  // illegal instruction exception
@@ -115,11 +102,13 @@ function clause execute (VGHSH(vs2, vs1, vd, suffix)) = {
   eg_len = (vl/EGS)
   eg_start = (vstart/EGS)
 
+  // H is component to all element groups
+  let helem = 0;
+  let H = brev8(get_velem(vs2, EGW=128, helem)); // Hash subkey
+
   foreach (i from eg_start to eg_len-1) {
-    let helem = if suffix == "vv" then i else 0;
-    let Y = (get_velem(vd,EGW=128,i));  // current partial-hash
+    let Y = get_velem(vd,EGW=128,i);  // current partial-hash
     let X = get_velem(vs1,EGW=128,i);  // block cipher output
-    let H = brev8(get_velem(vs2, EGW=128, helem)); // Hash subkey
 
     let Z : bits(128) = 0;
 
@@ -144,4 +133,4 @@ function clause execute (VGHSH(vs2, vs1, vd, suffix)) = {
 --
 
 Included in::
-<<zvkg>>, <<zvkgb>>, <<zvkng>>, <<zvksg>>
+<<zvkgs>>

From 8c5a9f255c3640ab40c537a3e9ced02a5921d02b Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Mon, 28 Aug 2023 01:15:11 -0700
Subject: [PATCH 09/26] fixing vghsh.vs/vgmul.vs descriptions

---
 doc/vector-extra/vghsh-vs.adoc |  2 +-
 doc/vector-extra/vgmul-vs.adoc | 41 +++++++++++-----------------------
 2 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/doc/vector-extra/vghsh-vs.adoc b/doc/vector-extra/vghsh-vs.adoc
index 1d531381..e1bf1c7d 100644
--- a/doc/vector-extra/vghsh-vs.adoc
+++ b/doc/vector-extra/vghsh-vs.adoc
@@ -51,7 +51,7 @@ A single "iteration" of the GHASH~H~ algorithm is performed.
 
 The previous partial hashes are read as 4-element groups from 'vd',
 the cipher texts are read as 4-element groups from `vs1`
- and the hash subkeys are read from the scalar element group in `vs2`
+ and the hash subkeys are read from the scalar element group in `vs2`.
 The resulting partial hashes are writen as 4-element groups into `vd`.
 
 
diff --git a/doc/vector-extra/vgmul-vs.adoc b/doc/vector-extra/vgmul-vs.adoc
index ca858010..1192f334 100644
--- a/doc/vector-extra/vgmul-vs.adoc
+++ b/doc/vector-extra/vgmul-vs.adoc
@@ -1,25 +1,11 @@
-[[insns-vgmul, Vector GHASH Multiply]]
-= vgmul.vv
+[[insns-vgmul-vs, Vector GHASH Multiply]]
+= vgmul.vs
 
 Synopsis::
-Vector Multiply over GHASH Galois-Field
+Vector-Scalar Multiply over GHASH Galois-Field
 
 Mnemonic::
-vgmul.vv vd, vs2
-
-Encoding (Vector-Vector)::
-[wavedrom, , svg]
-....
-{reg:[
-{bits: 7, name: 'OP-P'},
-{bits: 5, name: 'vd'},
-{bits: 3, name: 'OPMVV'},
-{bits: 5, name: '10001'},
-{bits: 5, name: 'vs2'},
-{bits: 1, name: '1'},
-{bits: 6, name: '101000'},
-]}
-....
+vgmul.vs vd, vs2
 
 
 Encoding (Vector-Scalar)::
@@ -38,7 +24,6 @@ Encoding (Vector-Scalar)::
 
 Reserved Encodings::
 * `SEW` is any value other than 32
-* `vgmul.vs` encoding (except if `Zvkgb` is enabled)
 
 Arguments::
 
@@ -61,11 +46,10 @@ Description::
 A GHASH~H~ multiply is performed.
 
 The multipliers are read as 4-element groups from 'vd',
- the multiplicands subkeys are read from either the corresponding 4-element group
-in `vs2` (vector-vector form) or the scalar element group in `vs2`
-(vector-scalar form, `Zvkgb` only). The resulting products are written as 4-element groups into `vd`.
+ the multiplicands subkeys are read from the scalar element group in `vs2`.
+The resulting products are written as 4-element groups into `vd`.
 
-This instruction treats all of the inputs and outputs as 128-bit polynomials and 
+This instruction treats all of the inputs and outputs as 128-bit polynomials and
 performs operations over GF[2].
 It produces the product over GF(2^128^) of the two 128-bit inputs.
 
@@ -92,13 +76,13 @@ swap bit positions and therefore do not require any logic.
 
 [NOTE]
 ====
-The instruction `vgmul.vv` is identical to `vghsh.vv` with vs1=0.
+Similarly to how the instruction `vgmul.vv` is identical to `vghsh.vv` with the value
+of vs1 register being 0, the instruction `vgmul.vs` is identical to `vghsh.vs` with the value of vs1 being 0.
 This instruction is often used in GHASH code. In some cases it is followed
 by an XOR to perform a multiply-add. Implementations may choose to fuse these
 two instructions to improve performance on GHASH code that
 doesn't use the add-multiply form of the `vghsh.vv` instruction.
 
-Similarly, the instruction `vgmul.vs` is identical to `vghsh.vs` with vs1=0.
 ====
 
 
@@ -114,11 +98,12 @@ function clause execute (VGMUL(vs2, vs1, vd, suffix)) = {
 
   eg_len = (vl/EGS)
   eg_start = (vstart/EGS)
+  // H multiplicand is constant for all loop iterations
+  let helem = 0;
+  let H = brev8(get_velem(vs2,EGW=128, helem)); // Multiplicand
 
   foreach (i from eg_start to eg_len-1) {
-    let helem = if suffix == "vv" then i else 0;
     let Y = brev8(get_velem(vd,EGW=128,i));  // Multiplier
-    let H = brev8(get_velem(vs2,EGW=128, helem)); // Multiplicand
     let Z : bits(128) = 0;
 
     for (int bit = 0; bit < 128; bit++) {
@@ -141,4 +126,4 @@ function clause execute (VGMUL(vs2, vs1, vd, suffix)) = {
 --
 
 Included in::
-<<zvkg>>, <<zvkgb>>, <<zvkng>>, <<zvksg>>
+<<zvkgs>>

From 056dd04083c6442f8b865fabd1d56f1c84343484 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Mon, 28 Aug 2023 01:15:39 -0700
Subject: [PATCH 10/26] adding vclmul/vclmulh instruction specification for
 Zve32e

---
 doc/vector-extra/insns/vclmul.adoc  | 104 ++++++++++++++++++++++++++++
 doc/vector-extra/insns/vclmulh.adoc |  98 ++++++++++++++++++++++++++
 2 files changed, 202 insertions(+)
 create mode 100644 doc/vector-extra/insns/vclmul.adoc
 create mode 100644 doc/vector-extra/insns/vclmulh.adoc

diff --git a/doc/vector-extra/insns/vclmul.adoc b/doc/vector-extra/insns/vclmul.adoc
new file mode 100644
index 00000000..e1874bf2
--- /dev/null
+++ b/doc/vector-extra/insns/vclmul.adoc
@@ -0,0 +1,104 @@
+[[insns-vclmul-32e, Vector Carry-less Multiply]]
+= vclmul.[vv,vx]
+
+Synopsis::
+Vector Carry-less Multiply by vector or scalar - returning low half of product.
+
+Mnemonic::
+vclmul.vv vd, vs2, vs1, vm +
+vclmul.vx vd, vs2, rs1, vm
+
+Encoding (Vector-Vector)::
+[wavedrom, , svg]
+....
+{reg:[
+{bits: 7, name: 'OP-V'},
+{bits: 5, name: 'vd'},
+{bits: 3, name: 'OPMVV'},
+{bits: 5, name: 'vs1'},
+{bits: 5, name: 'vs2'},
+{bits: 1, name: 'vm'},
+{bits: 6, name: '001100'},
+]}
+....
+
+Encoding (Vector-Scalar)::
+[wavedrom, , svg]
+....
+{reg:[
+{bits: 7, name: 'OP-V'},
+{bits: 5, name: 'vd'},
+{bits: 3, name: 'OPMVX'},
+{bits: 5, name: 'rs1'},
+{bits: 5, name: 'vs2'},
+{bits: 1, name: 'vm'},
+{bits: 6, name: '001100'},
+]}
+....
+Reserved Encodings::
+* `SEW` is any value other than 32 (`Zvbc32e` only)
+* `SEW` is any value other than 64 (`Zvbc` only)
+* `SEW` is any value other than 32 or 64 (`Zvbc` and `Zvbc32e`)
+
+Arguments::
+
+[%autowidth]
+[%header,cols="4,2,2"]
+|===
+|Register
+|Direction
+|Definition
+
+| Vs1/Rs1 | input  |  multiplier
+| Vs2 | input  |  multiplicand
+| Vd  | output | carry-less product low
+|===
+
+[NOTE]
+====
+`vclmul` instruction was initially defined in `Zvbc` with only `SEW=64-bit` support, this page describes how the specification is extended in `Zvbc32e` to support `SEW=32 bits`.
+====
+
+Description::
+Produces the low half of 128-bit carry-less product.
+
+Each SEW-bit element in the `vs2` vector register is carry-less multiplied by
+either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value
+from integer register `rs1` (vector-scalar). The result is the least
+significant SEW bits of the carry-less product.
+
+[NOTE]
+====
+The 64-bit carryless multiply instructions can be used for implementing GCM in the absence of the `zvkg` extension.
+We do not make these instructions exclusive as the 64-bit carryless multiply is readily derived from the
+instructions in the `zvkg` extension and can have utility in other areas.
+====
+
+Operation::
+[source,sail]
+--
+
+
+function clause execute (VCLMUL(vs2, vs1, vd, suffix)) = {
+
+  foreach (i from vstart to vl-1) {
+    let op1 : bits (SEW) = if suffix =="vv" then get_velem(vs1, i)
+                          else zext_or_truncate_to_sew(X(vs1));
+    let op2 : bits (SEW) = get_velem(vs2, i);
+    let product : bits (SEW) = clmul(op1, op2, SEW);
+    set_velem(vd, i, product);
+  }
+  RETIRE_SUCCESS
+}
+
+function clmul(x, y, width) = {
+  let result : bits(width) = zeros();
+  foreach (i from 0 to (width - 1)) {
+    if y[i] == 1 then result = result ^ (x << i);
+  }
+  result
+}
+--
+
+Included in::
+<<zvbc32e>>
diff --git a/doc/vector-extra/insns/vclmulh.adoc b/doc/vector-extra/insns/vclmulh.adoc
new file mode 100644
index 00000000..6f536542
--- /dev/null
+++ b/doc/vector-extra/insns/vclmulh.adoc
@@ -0,0 +1,98 @@
+[[insns-vclmulh, Vector Carry-less Multiply Return High Half]]
+= vclmulh.[vv,vx]
+
+Synopsis::
+Vector Carry-less Multiply by vector or scalar - returning high half of product.
+
+Mnemonic::
+vclmulh.vv vd, vs2, vs1, vm +
+vclmulh.vx vd, vs2, rs1, vm
+
+Encoding (Vector-Vector)::
+[wavedrom, , svg]
+....
+{reg:[
+{bits: 7, name: 'OP-V'},
+{bits: 5, name: 'vd'},
+{bits: 3, name: 'OPMVV'},
+{bits: 5, name: 'vs1'},
+{bits: 5, name: 'vs2'},
+{bits: 1, name: 'vm'},
+{bits: 6, name: '001101'},
+]}
+....
+
+Encoding (Vector-Scalar)::
+[wavedrom, , svg]
+....
+{reg:[
+{bits: 7, name: 'OP-V'},
+{bits: 5, name: 'vd'},
+{bits: 3, name: 'OPMVX'},
+{bits: 5, name: 'rs1'},
+{bits: 5, name: 'vs2'},
+{bits: 1, name: 'vm'},
+{bits: 6, name: '001101'},
+]}
+....
+Reserved Encodings::
+* `SEW` is any value other than 64 (`Zvbcb`)
+* `SEW` is any value other than 32 or 64 (`Zvbcb`)
+
+Arguments::
+
+[%autowidth]
+[%header,cols="4,2,2"]
+|===
+|Register
+|Direction
+|Definition
+
+| Vs1 | input  | multiplier
+| Vs2 | input  | multiplicand
+| Vd  | output | carry-less product high
+|===
+
+[NOTE]
+====
+`vclmulh` instruction was initially defined in `Zvbc`, this page describes how the specification is extended in `Zvbc32e` to support `SEW=32 bits`.
+====
+
+Description::
+Produces the high half of 128-bit carry-less product.
+
+Each SEW-bit element in the `vs2` vector register is carry-less multiplied by
+either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value
+from integer register `rs1` (vector-scalar). The result is the most
+significant SEW bits of the carry-less product.
+
+// This instruction must always be implemented such that its execution latency does not depend
+// on the data being operated upon.
+
+Operation::
+[source,sail]
+--
+function clause execute (VCLMULH(vs2, vs1, vd, suffix)) = {
+
+  foreach (i from vstart to vl-1) {
+    let op1 : bits (SEW) = if suffix =="vv" then get_velem(vs1,i)
+                          else zext_or_truncate_to_sew(X(vs1));
+    let op2 : bits (SEW) = get_velem(vs2, i);
+    let product : bits (SEW) = clmulh(op1, op2, SEW);
+    set_velem(vd, i, product);
+  }
+  RETIRE_SUCCESS
+}
+
+function clmulh(x, y, width) = {
+  let result : bits(width) = 0;
+  foreach (i from 1 to (width - 1)) {
+    if y[i] == 1 then result = result ^ (x >> (width - i));
+  }
+  result
+}
+
+--
+
+Included in::
+<<zvbc>>, <<zvbcb>>, <<zvknc>>, <<zvksc>>

From 11bd8af6b317c8b2080b074702765db5e9089c7f Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Mon, 28 Aug 2023 01:16:37 -0700
Subject: [PATCH 11/26] moving vghsh.vs/vgmul.vs spec from doc/vector-extra to
 doc/vector-extra/insns/

---
 doc/vector-extra/{ => insns}/vghsh-vs.adoc | 0
 doc/vector-extra/{ => insns}/vgmul-vs.adoc | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename doc/vector-extra/{ => insns}/vghsh-vs.adoc (100%)
 rename doc/vector-extra/{ => insns}/vgmul-vs.adoc (100%)

diff --git a/doc/vector-extra/vghsh-vs.adoc b/doc/vector-extra/insns/vghsh-vs.adoc
similarity index 100%
rename from doc/vector-extra/vghsh-vs.adoc
rename to doc/vector-extra/insns/vghsh-vs.adoc
diff --git a/doc/vector-extra/vgmul-vs.adoc b/doc/vector-extra/insns/vgmul-vs.adoc
similarity index 100%
rename from doc/vector-extra/vgmul-vs.adoc
rename to doc/vector-extra/insns/vgmul-vs.adoc

From 5e836daad9214be01dae84ba7e7492f5a9b26558 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Mon, 28 Aug 2023 01:16:54 -0700
Subject: [PATCH 12/26] adding instruction table

---
 .../riscv-crypto-vector-extra-inst-table.adoc | 60 +++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc

diff --git a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc
new file mode 100644
index 00000000..01c1bd23
--- /dev/null
+++ b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc
@@ -0,0 +1,60 @@
+[appendix]
+[[crypto_vector_instructions]]
+=== Crypto Vector Cryptographic Instructions
+
+OP-P (0x77)
+Crypto Vector instructions, including Zvkgs, except Zvbb and Zvbc
+The new/modified encoding are in bold and underlined.
+
+// [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"]
+[cols="4,1,1,1,1,4,1,1,1,4,1,1,1"]
+|===
+5+^|Integer 4+^|Integer 4+^| FP
+
+| funct3 | | | |            | funct3 | | |             | funct3 | | |
+| OPIVV  |V| | |            | OPMVV  |V| |             | OPFVV  |V| |
+| OPIVX  | |X| |            | OPMVX  | |X|             | OPFVF  | |F|
+| OPIVI  | | |I|            |        | | |             |        | | |
+|===
+
+// [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"]
+[cols="6,1,1,1,1,6,1,1,6,6,1,1,1"]
+
+// TODO to be updated with vghsh.vs and vgmul.vs encoding
+|===
+5+^| funct6                  4+^| funct6                 4+^| funct6
+
+|100000  | | | |            | 100000 |V| | vsm3me       | 100000 | | |
+| 100001 | | | |            | 100001 |V| | vsm4k.vi     | 100001 | | |
+| 100010 | | | |            | 100010 |V| | vaesfk1.vi   | 100010 | | |
+| 100011 | | | |            | 100011 | | | __**vghsh.vs**__ | 100011 | | |
+| 100100 | | | |            | 100100 | | |              | 100100 | | |
+| 100101 | | | |            | 100101 | | |              | 100101 | | |
+| 100110 | | | |            | 100110 | | |              | 100110 | | |
+| 100111 | | | |            | 100111 | | |              | 100111 | | |
+|        | | | |            |        | | |              |        | | |
+| 101000 | | | |            | 101000 |V| | VAES.vv    | 101000 | | |
+| 101001 | | | |            | 101001 |V| | *VAES.vs*    | 101001 | | |
+| 101010 | | | |            | 101010 |V| | vaesfk2.vi   | 101010 | | |
+| 101011 | | | |            | 101011 |V| | vsm3c.vi     | 101011 | | |
+| 101100 | | | |            | 101100 |V| | vghsh        | 101100 | | |
+| 101101 | | | |            | 101101 |V| | vsha2ms      | 101101 | | |
+| 101110 | | | |            | 101110 |V| | vsha2ch      | 101110 | | |
+| 101111 | | | |            | 101111 |V| | vsha2cl      | 101111 | | |
+|===
+
+<<<
+
+.VAES.vv and VAES.vs encoding space
+[cols="2,14"]
+|===
+|vs1|
+
+| 00000 | vaesdm
+| 00001 | vaesdf
+| 00010 | vaesem
+| 00011 | vaesef
+| 00111 | vaesz
+| 10000 | vsm4r
+| 10001 | __**vgmul**__
+|===

From c986a6ffaeddbf5b5c411ea25a50fdba263b6027 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Mon, 28 Aug 2023 01:24:07 -0700
Subject: [PATCH 13/26] main document for vector extra

---
 .../riscv-crypto-spec-vector-extra.adoc       | 135 +++---------------
 1 file changed, 21 insertions(+), 114 deletions(-)

diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
index 768d7999..f3adb3df 100644
--- a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
+++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
@@ -46,14 +46,12 @@ endif::[]
 [colophon]
 = Colophon
 
-This document describes the Vector Cryptography extensions to the 
+This document describes the Vector Cryptography Extra extensions to the
 RISC-V Instruction Set Architecture.
 
-This document is _frozen_.
-Change is extremely unlikely. A high threshold will be used, and a
-change will only occur because of some truly critical issue being
-identified during the public review cycle. Any other desired or needed
-changes can be the subject of a follow-on new extension.
+This document is _Discussion Document_.
+Assume everything can change.
+This document is not complete yet and was created only for the purpose of conversation outside of the document.
 For more information, see link:http://riscv.org/spec-state[here].
 
 [NOTE]
@@ -66,7 +64,7 @@ link:http://creativecommons.org/licenses/by/4.0/[Creative Commons Attribution 4.
 ====
 include::git-commit.adoc[]
 
-See link:https://github.com/riscv/riscv-crypto[github.com/riscv/riscv-crypto]
+See link:https://github.com/riscv/riscv-crypto/doc/vector-extra[github.com/riscv/riscv-crypto/doc/vector-extra]
 for more information.
 ====
 
@@ -75,14 +73,9 @@ for more information.
 
 Contributors to this specification (in alphabetical order)
 include: +
-Alan Baum,
-Barna Ibrahim,
-Barry Spinney,
-Ben Marshall,
-Derek Atkins,
-link:mailto:kdockser@tenstorrent.com[Ken Dockser] (Editor),
+Ken Dockser,
 Markku-Juhani O. Saarinen,
-Nicolas Brunie, 
+Nicolas Brunie,
 Richard Newell
 
 We are all very grateful to the many other people who have
@@ -91,34 +84,22 @@ feedback and questions.
 
 // ------------------------------------------------------------
 
-include::riscv-crypto-vector-introduction.adoc[]
-include::riscv-crypto-vector-audience.adoc[]
-include::riscv-crypto-vector-sail-specifications.adoc[]
-include::riscv-crypto-vector-policies.adoc[]
+include::riscv-crypto-vector-extra-introduction.adoc[]
 
 // ------------------------------------------------------------
 
-include::./riscv-crypto-vector-element-groups.adoc[]
-include::./riscv-crypto-vector-instruction-constraints.adoc[]
-include::./riscv-crypto-vector-scalar-instructions.adoc[]
-include::./riscv-crypto-vector-software-portability.adoc[]
 <<<
-    
 // ------------------------------------------------------------
 
 
 [[crypto_vector_extensions]]
 == Extensions Overview
 
-The section introduces all of the  extensions in the Vector Cryptography
+The section introduces all of the extensions in the Vector Cryptography Extra
 Instruction Set Extension Specification.
 
-The <<zvknh,Zvknhb>> and <<zvbc>> Vector Crypto Extensions
---and accordingly the composite extensions <<Zvkn>> and <<Zvks>>--
-require a Zve64x base,
-or application ("V") base Vector Extension.
 
-All of the other Vector Crypto Extensions can be built
+All the Vector Crypto Extra Extensions can be built
 on _any_ embedded (Zve*) or application ("V") base Vector Extension.
 
 // See <<crypto-vector-element-groups>> for more details on vector element groups and the drawbacks of
@@ -126,14 +107,14 @@ on _any_ embedded (Zve*) or application ("V") base Vector Extension.
 
 
 All _cryptography-specific_ instructions defined in this Vector Crypto specification (i.e., those
-in <<zvkned>>, <<zvknh,Zvknh[ab]>>, <<Zvkg>>, <<Zvksed>> and <<zvksh>> but _not_ <<zvbb>>,<<zvkb>>, or <<zvbc>>) shall
+in <<Zvkgs>>, but _not_ <<zvbc32e>>) shall
 be executed with data-independent execution latency as defined in the
 link:https://github.com/riscv/riscv-crypto/releases/tag/v1.0.1-scalar[RISC-V Scalar Cryptography Extensions specification].
 It is important to note that the Vector Crypto instructions are independent of the
 implementation of the `Zkt` extension and do not require that `Zkt` is implemented.
 
-This specification includes a <<Zvkt>> extension that, when implemented, requires certain vector instructions
-(including <<zvbb>>, <<zvkb>>, and <<zvbc>>) to be executed with data-independent execution latency.
+//This specification includes a <<Zvkt>> extension that, when implemented, requires certain vector instructions
+//(including <<zvbb>>, <<zvkb>>, and <<zvbc>>) to be executed with data-independent execution latency.
 
 Detection of individual cryptography extensions uses the
 unified software-based RISC-V discovery method.
@@ -144,98 +125,26 @@ At the time of writing, these discovery mechanisms are still a work in
 progress.
 ====
 
-include::./riscv-crypto-vector-zvbb.adoc[]
+include::./riscv-crypto-vector-extra-zvbc32e.adoc[]
 <<<
-include::./riscv-crypto-vector-zvbc.adoc[]
-<<<
-include::./riscv-crypto-vector-zvkb.adoc[]
-<<<
-include::./riscv-crypto-vector-zvkg.adoc[]
-<<<
-include::./riscv-crypto-vector-zvkned.adoc[]
-<<<
-include::./riscv-crypto-vector-zvknh.adoc[]
-<<< 
-include::./riscv-crypto-vector-zvksed.adoc[]
-<<<
-include::./riscv-crypto-vector-zvksh.adoc[]
-<<<
-include::./riscv-crypto-vector-zvkn.adoc[]
-<<<
-include::./riscv-crypto-vector-zvknc.adoc[]
-<<<
-include::./riscv-crypto-vector-zvkng.adoc[]
-<<<
-include::./riscv-crypto-vector-zvks.adoc[]
-<<<
-include::./riscv-crypto-vector-zvksc.adoc[]
-<<<
-include::./riscv-crypto-vector-zvksg.adoc[]
-<<<
-include::./riscv-crypto-vector-zvkt.adoc[]
+include::./riscv-crypto-vector-zvkgs.adoc[]
 <<<
 
 
 
 // ------------------------------------------------------------
 
-[[crypto_vector_insns, reftext="Vector Cryptography Instructions"]]
+[[crypto_vector_extra_insns, reftext="Vector Cryptography Extra Instructions"]]
 == Instructions
 
 
-include::insns/vaesdf.adoc[leveloffset=+2]
-<<<
-include::insns/vaesdm.adoc[leveloffset=+2]
-<<<   
-include::insns/vaesef.adoc[leveloffset=+2]
-<<<
-include::insns/vaesem.adoc[leveloffset=+2]
-<<<
-include::insns/vaeskf1.adoc[leveloffset=+2]
-<<<
-include::insns/vaeskf2.adoc[leveloffset=+2]
-<<<
-include::insns/vaesz.adoc[leveloffset=+2]
-<<<
-include::insns/vandn.adoc[leveloffset=+2]
-<<<
-include::insns/vbrev.adoc[leveloffset=+2]
-<<<
-include::insns/vbrev8.adoc[leveloffset=+2]
-<<<
-include::insns/vclmul.adoc[leveloffset=+2]
-<<<
-include::insns/vclmulh.adoc[leveloffset=+2]
-<<<
-include::insns/vclz.adoc[leveloffset=+2]
-<<<
-include::insns/vcpop.adoc[leveloffset=+2]
-<<<
-include::insns/vctz.adoc[leveloffset=+2]
-<<<
-include::insns/vghsh.adoc[leveloffset=+2]
-<<<
-include::insns/vgmul.adoc[leveloffset=+2]
-<<<
-include::insns/vrev8.adoc[leveloffset=+2]
-<<<
-include::insns/vrol.adoc[leveloffset=+2]
-<<<
-include::insns/vror.adoc[leveloffset=+2]
-<<<
-include::insns/vsha2c.adoc[leveloffset=+2]
-<<<
-include::insns/vsha2ms.adoc[leveloffset=+2]
-<<<
-include::insns/vsm3c.adoc[leveloffset=+2]
-<<<
-include::insns/vsm3me.adoc[leveloffset=+2]
+include::insns/vclmul-32e.adoc[leveloffset=+2]
 <<<
-include::insns/vsm4k.adoc[leveloffset=+2]
+include::insns/vclmulh-32e.adoc[leveloffset=+2]
 <<<
-include::insns/vsm4r.adoc[leveloffset=+2]
+include::insns/vghsh-vs.adoc[leveloffset=+2]
 <<<
-include::insns/vwsll.adoc[leveloffset=+2]
+include::insns/vgmul-vs.adoc[leveloffset=+2]
 <<<
 
 [[bibliography]]
@@ -245,8 +154,6 @@ bibliography::../riscv-crypto-spec.bib[ieee]
 
 [[Encodings]]
 == Encodings
-include::./riscv-crypto-vector-inst-table.adoc[]
-include::./riscv-crypto-vector-inst-table-zvbb-zvbc.adoc[]
+include::./riscv-crypto-vector-extra-inst-table.adoc[]
 
 
-include::./riscv-crypto-vector-appx-sail.adoc[]

From 4ae2021a989ddc90fda47fd4db721c3d850ea322 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Mon, 28 Aug 2023 01:25:06 -0700
Subject: [PATCH 14/26] renaming vclmul/vclmulh 32e spec files

---
 doc/vector-extra/insns/{vclmul.adoc => vclmul-32e.adoc}   | 0
 doc/vector-extra/insns/{vclmulh.adoc => vclmulh-32e.adoc} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename doc/vector-extra/insns/{vclmul.adoc => vclmul-32e.adoc} (100%)
 rename doc/vector-extra/insns/{vclmulh.adoc => vclmulh-32e.adoc} (100%)

diff --git a/doc/vector-extra/insns/vclmul.adoc b/doc/vector-extra/insns/vclmul-32e.adoc
similarity index 100%
rename from doc/vector-extra/insns/vclmul.adoc
rename to doc/vector-extra/insns/vclmul-32e.adoc
diff --git a/doc/vector-extra/insns/vclmulh.adoc b/doc/vector-extra/insns/vclmulh-32e.adoc
similarity index 100%
rename from doc/vector-extra/insns/vclmulh.adoc
rename to doc/vector-extra/insns/vclmulh-32e.adoc

From 0083833fa1bb44283157d9c4eff3a8c4f6c90137 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nibrunie@gmail.com>
Date: Thu, 31 Aug 2023 16:57:15 +0200
Subject: [PATCH 15/26] fixing vector-extra build issues

---
 doc/vector-extra/riscv-crypto-spec-vector-extra.adoc   |  2 +-
 .../riscv-crypto-vector-extra-introduction.adoc        | 10 ++++++++++
 ...32e.adoc => riscv-crypto-vector-extra-zvbc32e.adoc} |  0
 ...zvkgs.adoc => riscv-crypto-vector-extra-zvkgs.adoc} |  0
 4 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc
 rename doc/vector-extra/{riscv-crypto-vector-zvbc32e.adoc => riscv-crypto-vector-extra-zvbc32e.adoc} (100%)
 rename doc/vector-extra/{riscv-crypto-vector-zvkgs.adoc => riscv-crypto-vector-extra-zvkgs.adoc} (100%)

diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
index f3adb3df..2dae82a0 100644
--- a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
+++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
@@ -127,7 +127,7 @@ progress.
 
 include::./riscv-crypto-vector-extra-zvbc32e.adoc[]
 <<<
-include::./riscv-crypto-vector-zvkgs.adoc[]
+include::./riscv-crypto-vector-extra-zvkgs.adoc[]
 <<<
 
 
diff --git a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc
new file mode 100644
index 00000000..fd7590b0
--- /dev/null
+++ b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc
@@ -0,0 +1,10 @@
+[[crypto_vector_introduction]]
+== Introduction
+
+This document describes the proposed _vector_ _extra_ cryptography
+extensions for RISC-V.
+Those extensions extends the _vector_ cryptography extensions for RISC-V,
+providing extra feature not mandatory for a high performace implementation but which
+can help further improve the efficiency of the algorithms that use them.
+All instructions proposed here are based on the Vector registers.
+
diff --git a/doc/vector-extra/riscv-crypto-vector-zvbc32e.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc
similarity index 100%
rename from doc/vector-extra/riscv-crypto-vector-zvbc32e.adoc
rename to doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc
diff --git a/doc/vector-extra/riscv-crypto-vector-zvkgs.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc
similarity index 100%
rename from doc/vector-extra/riscv-crypto-vector-zvkgs.adoc
rename to doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc

From 21479fda3b03bb9e4786ab53972232799caf48c3 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <82109999+nibrunieAtSi5@users.noreply.github.com>
Date: Thu, 31 Aug 2023 08:09:11 -0700
Subject: [PATCH 16/26] Fixing reserved encoding description for vclmulh

Signed-off-by: Nicolas Brunie <82109999+nibrunieAtSi5@users.noreply.github.com>
---
 doc/vector-extra/insns/vclmulh-32e.adoc | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/vector-extra/insns/vclmulh-32e.adoc b/doc/vector-extra/insns/vclmulh-32e.adoc
index 6f536542..c90d8b5f 100644
--- a/doc/vector-extra/insns/vclmulh-32e.adoc
+++ b/doc/vector-extra/insns/vclmulh-32e.adoc
@@ -36,8 +36,9 @@ Encoding (Vector-Scalar)::
 ]}
 ....
 Reserved Encodings::
-* `SEW` is any value other than 64 (`Zvbcb`)
-* `SEW` is any value other than 32 or 64 (`Zvbcb`)
+* `SEW` is any value other than 64 (`Zvbc` only)
+* `SEW` is any value other than 32 (`Zvbc32e` only)
+* `SEW` is any value other than 32 or 64 (`Zvbc32e` and `Zvbc`)
 
 Arguments::
 
@@ -95,4 +96,4 @@ function clmulh(x, y, width) = {
 --
 
 Included in::
-<<zvbc>>, <<zvbcb>>, <<zvknc>>, <<zvksc>>
+<<zvbc32e>>

From e96eabc7dea0916823f4d11ba29359c6e1b4800e Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <82109999+nibrunieAtSi5@users.noreply.github.com>
Date: Thu, 31 Aug 2023 08:11:10 -0700
Subject: [PATCH 17/26] Fixing title page

Signed-off-by: Nicolas Brunie <82109999+nibrunieAtSi5@users.noreply.github.com>
---
 doc/vector-extra/riscv-crypto-spec-vector-extra.adoc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
index 2dae82a0..322450b9 100644
--- a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
+++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
@@ -1,10 +1,10 @@
 [[riscv-doc-template]]
-= RISC-V Cryptography Extensions Volume II: Vector Instructions
-:description: The vector cryptography extensions for the RISC-V ISA.
+= RISC-V Cryptography Extensions Volume III: Extra Vector Instructions
+:description: The vector extra cryptography extensions for the RISC-V ISA.
 :company: RISC-V.org
-:revdate: 08 August 2023
-:revnumber: v1.0.0
-:revremark: RC2
+:revdate: 31 August 2023
+:revnumber: v0.0.1
+:revremark:
 :url-riscv: http://riscv.org
 :doctype: book
 //:doctype: report

From 8b5dab8c97008de310e86e6a65f9e3f68f4d210d Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Thu, 31 Aug 2023 08:16:31 -0700
Subject: [PATCH 18/26] Fixing Zvbc32e description

---
 .../riscv-crypto-vector-extra-zvbc32e.adoc          | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc
index 7bd8e84e..a5b6af26 100644
--- a/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc
+++ b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc
@@ -1,18 +1,23 @@
 [[zvbc,Zvbc]]
-=== `Zvbc` - Vector Carryless Multiplication
+=== `Zvbc32e` - Vector Carryless Multiplication
 
 General purpose carryless multiplication instructions which are commonly used in cryptography
 and hashing (e.g., Elliptic curve cryptography, GHASH, CRC).
 
-These instructions are only defined for `SEW`=64.
+These instructions are only defined for `SEW`=32.
+Zvbc32e can be supported when `ELEN >=32`.
+
+
+Note:: The extension `Zvbc32e` is independent from `Zvbc` where the same instructions are defined for `SEW=64`.
+       When `ELEN>=64` both extensions can be combined to have `vclmul.v[vx]` and `vclmulh.v[vx]` defined for both `SEW=32` and `SEW=64`.
 
 [%autowidth]
 [%header,cols="^2,4"]
 |===
 |Mnemonic
 |Instruction
-| vclmul.[vv,vx]     | <<insns-vclmul>>
-| vclmulh.[vv,vx]    | <<insns-vclmulh>>
+| vclmul.[vv,vx]     | <<insns-vclmul-32e>>
+| vclmulh.[vv,vx]    | <<insns-vclmulh-32e>>
 
 |===
 

From 34a114ea18ff699d66db210219ff18f4bb655663 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Thu, 31 Aug 2023 08:18:55 -0700
Subject: [PATCH 19/26] adding vector-extra copy of images/risc-v_logo.png

---
 doc/vector-extra/images/risc-v_logo.png | Bin 0 -> 11962 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 doc/vector-extra/images/risc-v_logo.png

diff --git a/doc/vector-extra/images/risc-v_logo.png b/doc/vector-extra/images/risc-v_logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..d754746ed2a3bbd7ad9b4aae9081e0e044493ee0
GIT binary patch
literal 11962
zcmch7WmFq$+iq}or?|VjyIaxV5VUA;cPZ`^Dbgavg1ftWk>XlPaVak4(C+=d-*?ve
ze`c-8W7l=-Jeiq0iPlh)M?)q?1^@tPiV8BC007kDYu^$P{`FJX87m0@Kvj4rEv=y_
zElr`}>TLba!3qFSh)&l-0BVim=NW1$+J+!1AazD-7AYfrmc>kBT0&H$_&kt=DRay=
z&eY<G1@)$lso<vpK%e<=1qj=2?*r6$yEqt%md?*uQ}x(++vU~eeERJ6Bj+V;qSFf=
zkV~AWogI4!okY)=G$<clLs$HzR$(0g#XteYyfv~rUVPlrVhgbP`P<9KkJLcAb*JF_
z!~Rnco@j{16apYROhB4qe_zrc6VMUPo{k5QxC-$-%7W9R&4aEDL8U{i)sC*8Z;^@S
zo^R2#N+p~Q@#Y6u5+~A+Lrb(19qLCsv5XW3;DGd^C0L*&-lMuum=54bkRV-@c61Oa
zp`HXw59x{M2klM}{`h`T7g^hepPnQ===zS$dGTdA;f2+wHRdy+CwNAtZp7Pavc{R;
z&02<$6K%&tQBH+r-~qFn07d%-vrs#kic8TkG5d0lWQn^cd`YJVB|dZscL%KKq|zXl
zpUjB9FNLayXh6fZMQ7(vfU;+fL_9^j9Cx=wnZvkS!`p=>lEqZ@^>+gmeRSn^HzT&|
z`@2otSZu9%L5q1Q7gyAQ6-@3uL8P~KZzjF~2cn7Uk{Nf>?^h*`>C<GwOYdvnW!0T~
z$9J-*i)phr0cm~=*@AUK7&tZi>pxCw?K#7`)NAyq5$|A)(aT~iP+>+!me~&rg{RP{
zb^<dRC}>P}AC<D-l++HF^{pWmZy<xTe9S|K2ryWJQ8XtLhZRtEuorxGh?YApTn_!u
zsW8z5C~#|np%YL!U@T!={G7GepNjL45JV_o^d%6Dc(N%IaTnrXO>iVQ?_j7XlQ145
zO06S4VIGQO*e~c2B$Z{eYWsW-9>&}cVMsy=V%9cE*|r_s!kUkI!OF#On~abayj3TC
zV^_dSJc!AzY%`KVD2GZbhut5Y(1Ju3^|(8GPK^3q_%!Sh-Rdzzber>Y>Ul6Yenz&1
z*SJIBMpcxB1IoP@aXUMkyQ_+*ISi8LjYtN#r{td+Pz^>A!<Pf?N^&&O&%5$uF;~2N
zcy#9VU+{=09ZV0^l+y@hMBnta6MpB1UeDn7;PHOG)f)9Som=FWqyLm%E`E>SeDjAT
z&@sRhUpP8<IKfOcID&LIyO!^`6dR+uK)zgu2ba|(T^;>9)(bV6I*!(|Vyk_pec}=Q
zwNXfo!hIlY4$)VAE|(9b!Gn>`ft0b#+lgd@u3jEqFcSBqZ@a&kaO-R$g!^o>8~;WE
z_>X_$I$|fCCJydTgfi=RR<?OY79(Q@2(0w#jUX=w8Tn8Gm}WxM(>OU07JHx;chNh7
z#S!7n%|xQ$qABoH!i@2IuUTNYdr4|A(?VTqpdoNNW;jd8o}r><;<nK8J&NbdEr>!r
zyx=!gc~0jfS0T-207*=2DS5&uPEg1a)l`)5P8ckexdIP%ViDDW0zQ=_?g+_u;UW_K
z2yIHkB%$v%-#J{d1ryz=nv)(CFfNhrqd*L3yb_Zd!tSVtVRf?%JXpCg)}QZfISoSD
zq?u<8-1%3c@?_WxzRj>*39ckUsi0VZK4hZ&2y>|cvY?F*ST2b`P%C=uYT+Spzo+$T
z*dPRt!8k7PQ+>u;$O!|fV6nW!>HdrzojbJ-Eb(~L-m~3+I|ArjGbJ$H9ac-YVmM2m
ziWKAL(3)Iu6c}Dibv?XUhALUHf!?>jgfl%;HAmG*X-#5HSw!-fMvUerRe{DLg_|)L
zS9vEHS)sV-x@f&<w`jb`>8oEI{&MKC8ajPRGVOQS@5q;Imz<YYmpGRI*$j~KMd5%Z
zY?^-BUYcVX{ur$izg2L#d}9f`22R=Xe8jwx9n3ZAHTreqhYugpKU6Quu+}a@S1MHo
z80<Hk+6R0(n9H#xS;5T?ZHj9O^9=Qjy~$Q9S)C0#cy|T;z~xK&jQdRX+{q{H*TAP|
z_({~X-Jt#LW8Do3X-q;Tx}i<BTCG~d6jZLVP-h#iv9NIho4Qw?Q{$=m`kS_AUsO^4
zPGF}<XR&Yl)zp#b0{y}2!TST0E9I-^!~Cy7BRzzRsOVwScL}xeE|Ig25<E5hTf~-c
z(%u|%+VcP8p64gA&vXD<0v)~QYht50Pcn_H+1v!(#D59>+C{9#I>ze5QkLV1YZ2MW
zomD6_qyGFhYu2RDUXDCof5?1DaELknW87Kx7M&U$j;eLG1COA^SK+T_MK21!N9vLv
z<$pWHWD8{teVpQ3w_CAZakh5CH>cw@boLyBe9L)9=8)&uHT0C}lL*&OH6!IvJG9d>
z$o+vbc5=*0r%tW@fkl;-nicPZA<HT&0_3<d1%j`C(%5FN+IUkt)nH<=Znth@Vb0xa
zouJw|dDet$NN$*XBD7lHbas4vqIn!xV_)ZI=WIFM$lg@l+`TK|h3GosRqD}lsNjC(
zGU2kewX_{B<RcWFQ=ZEvSU6=e1=>fO$}L1J^p{tZTS#czmU1(1$8DQ$Un8F+Ar<wV
z{rP2<eP}68@lCz!#4@L;k*KkoadYd2=bYD@v!;>C!ISj%RN>9emFIGVb?P;XF8TW5
z@!Bbi2n+s%&V$kOoUQvXn~HrKc4zi1y)eDR=6J6*ulQRua+rL9{9>_vf21zTu3rI(
zcY)`>E}73)&u&Y)V}lZ2*j`F@kB5<l9pBt?^e_bz)X;unPQuG1YZ0tr9^r~2(0+48
zu_Oe<>=3;}T20QK+?Wi++99Sw5yZVr*d5Lq7sQH*kPH71(e(b{J#B<e#5zhd@-%Wa
z##!unTqdatRSG5%UMtrn{I|IA{Y96@A-}|Y;ot7KtW1>UOox&US`931R?tG`PvHwV
zrvpnv3ZQI|Bn!W%$ML$rW6M*EDIGTbkSZkaPGEyIQ0-|MR<7?V+!)CjM;beWenZVw
zL6fGD?ubTRu}V2G13SBpso4H&rlK3Yu?nvuUI|}G^4FBPrjl!Q^p72$uD7_;jKx}2
z>W%pZJl9V=sqb3n64btBO??cc$W6*i>0*AyVgH~vel)#hJ#Tq+A^7C+v>$5LODvhl
zA<?3AN5f6wrt@PC5)4_;7cHQ88&Rq^fu$F1YAtW=JQkRN^Rb@VhJ&Bh%b84&UG%I*
z#x#XmP_*A)ZZ=M>tiC~eCH*X;o3C3PpLUf^{=3%RBpf`rf8z^S!@s)4ln@9sz6-Rz
zoPd$_l}#yV(pRhhFyX67V)8WS;LjK2c|<ZUh_b(yKc3faMaIQ$VRGbubWi)Moj`_V
zsWqvM!)Dt#)`+0e;Oa+v+L?jUgc5$5$dlO4<Adgjrv5=Z`BTtR1EDF4$y(Qd#pnaA
z=@_gP!I<$l^O!j69((6`;c8;KI6B+7(PGz{{rwQ$0A32-H$C{uwN@o(os07K74#4%
zqkaRerXQ0l2lmr?ag|FA32kb%RTW%qbv~Vw4|qr7I??4-I`6g5Dmg3lHGesUnf33*
z_r7_<m*vQ`dF4{?{cDmH%aGXq#E!WMyBXgppo7(CbvfTZuzc02uG9Z<pEQNkSa{bf
zuj*G-#i~`Cy^p9tb?Mo%sFl}n-i+uZ8XKD74cG05Gnhx*$Gqp9-@mS(-#!{*+>zJ{
z*!tD{z}(f)r~WEXQCqDzKfjQ(vgYIcw7rP`@<F;g?zh!2);RToJX#`~xSXi2tHZhX
z#l>%&uQ;9b^=e`cxf{NZ^F>;E>gpovfpmuxt)~4!q8=XtZthxx-Q=epbH)98_oe1!
zt7GFG^Bms?{(3smXxH=Gr9D%gB<&()&D|ABG)8apzf}5}Zd&bHZPJ17R{6{K>-9(e
zFKa*B7+dbIbWbZYx(R-Zf&F?(`*Q2r+q{@hvo<Q8Gj_XgJbgS!JIQ^!PbB-oRx$1|
z^2JlcALflOGk>@2L=UF6m7J<4UWMPfi+p^7JhoolOE@2p`v%Q-fW2MLr?<pcALy<<
zy3;yI{2Q+tFOEo^hq~UKBp!9&@|~UZwAbAZop}YG--B-~#)j!(xW$vjbOJq})-Gue
z$8&o-dL<dvhqN5wI0R-UQ~)L?fZ#mjTXa%@27Se}50m7)jTRpC3sS{&DANVBnLGzT
z5d{zx>6aD{UXoG?^V-uD<s|$>M%FJ)eW`-s#Y=$p`jB*nfMEI&O2Q6mtKI<&HtXNo
z+IQqwHU#91=;gDgONHBnO(rPKZ^*%Ij&(yd9t0XVFUj4#zFHuG74+Qz0DRg%CzPTl
z&H3w_NA5dqpgT}iMaaV0k<HxF8Dz!g?FfEF0{|l4La$v%D|d4WZ$}3wHz99Ps=o-K
z*Z!Yrb}EX$5O;e~Dxj(cg|xG)6$LLF4;u%S7%~L~g@~)AwUDNa+`shKktmg|yE|Bj
zo!!gJi_MFh&Dqt4ol{Uyke!2zor{b06~XG}<K%Ac&FbVv{g06U$dR#fvv7R}c7NyW
zMDa(iImp?=U6hLIkD~t`|JZ5e{qBD>Il2Ao)~kc;e^%Hz**Mt$EBloy@@G~^<DIva
z15oCjqm`4}s}3<9eh!hp`2T0+e>DCtrvCphIXV6>=Kosxmnp*j$ASOl&_B`oJNp_g
zF=P?;|At-+`7F7T=rtQ8?_|`qUt8!usd+uPUQdR9+t)s3`moePz-w!+C?l!u4Rvgc
z;*KkeACQ_gp+Zp#3x_RXjENXUb(tI#EHh7Shv^d5UU1CBP<j+2Pg?+=u#-p;(-N~^
zT2oPDqoeV}qVRpcO8M>}W=vk&Nb+eLVZ??jb=jA6oSQ~;uG*&Drj5ILYBWpbwxV3G
z+3vx!)AxD|4<AW@6FLq+-Gj&h9T$uZYor2E0)Lkxgpv!!GJ_?ALW82yB}4o#KngY!
zh!;94`(KU<L<Ii7;0Q#HS|-@kSkNo+FFzCLgZe7>AMwaoW*{9zI)cGJs{T&ZUK0M%
z@fV6q)iY3zslpcXSM8rE&>rVstNuV4WP+o0C^L<4|1tD+YGBOb9}8ch8c_h83Mr0O
zhQE?t1@?#y5DOt+zi*QK`JQ#hE%t5=9#iDFl+5*hMQYLZGe7-5z>o&nSL8|yN*^U+
zKs|b>sR+jBYG%NIS35nOZu>3f(eU4>Tq7=J0I63^9!cj<452;Rf5@6NuD_^2nvSq_
zqyCs1QbfIl3VMe7aGtFG$@&*>aK5LC3dHjmZ!!As@^`8&22lD8tXMW$Z)faxy;c7<
z|C0gYBi+@X{W1RrgN7VR9-FyOyZI5zoS02r48&>ooyZ;)e24$n<#(aM7SVW*^yj@Q
zh=IRz9NS`|7x;{O|6wO_!IG##q72L)Co!Kc2h*Sk^*M3uX^5Z1|8ftagLk9xq;xcl
zIbr)6H@|csWC67{;2{5`g;Boeet?MrHQ-uk-YF}^mqSoOX6!K-?{8)(77zziA)*gz
z>Iz=@(=dXS^^M(5#%|_IpZp1h1jr4n9(S3VJ?CVwG<Hr}F3ZOTOnS?czPtX-DVPnK
zky=-Dq{9Tm=(!y_Q=|;jpy}!$TBcK%U5awCLyP1(kjsf?75C}aw%t<WqXbv>hZLcQ
z+miywM)@JY!S9S)(?=TK6DDI^Hl-NxL>!EW?tcv<JMY5f(^~mi+9YAmSD2{(bp3Lx
zMOXiWnfA-Br+HlkT}JM#HHSXmLg;pO7^c_*qduCRrylrTjcVMG(UuvMj^-F3rJVV)
zB||uK#^V!xrQhj<8;)^CH)=qc<^ZZ(yF@bL-DB6~8RWC_#Kbu5OSX}?$PAn*^n2fO
zy;6TmVdQ_`U-U%o;7cfyhnfP(!%UGFY$IJU-COMRrJDS)(lA=M9m%-Q=!~)RercO<
z^WZ#Vcu?Kg`9WU}uEFWnzxD*orbEKzHxWEB_a%8xixF6VK{aDS!k}bHhQW&bf<_@`
zQM^~}6d7}k)~%W*jly`TG8<*0;s5ppsIR(SPg^^NKanR$Hk3%p&`lf&e=CX+^0MK?
zV3Yp33v@>K(hM2kU*Zlj26&~F;A;!uwyTx-`*zhU>+liZq*tW?FCr|Xdz|kp(!qBM
z@$m*(nM}Ia%<~AJ9!@@ER?!($tp(b?)kfNC+!#wjwEN6%8o6@9pB#Ih=fdu-)+to}
zG~9mEA3v8L@!aQ{_KnPR(eIru?9-$!WE~0t{Wsvgua2VxLi3rxyDYde8ZX`da@PKn
znJq?{^RfCirO~xq^AZ>ShUyKmz0d^ky3Kw{l$PTL<mQ~Ai^F^~h_>^7H3v}=<8kPl
zHYT&!8W}?n+=uG5wU#^Ui*-#99%4k7ue5-(8lA=70S{S+ji}TI^)|Cox#KHB7EfP)
z>2hM#9)0RTN!Cx!R#KXsc9%Gq_}ImBu^#!P0;#!LUC>}C+lzuSq|}O&Xx{O&h;OEC
zq^_&^640&I{csvDa|ks03sNE$I`OtwmvknaMjt!nG6ZfC(g$M1$G+;1CscB%cF`(9
z><1h^H#oej52UQk2D+3kJlR78oKk;-cG!VvM8nq&dkmGU_n7l1K5>wbK_kjZ$!-Z%
zB3%u5@vVAqOxe4^vDPPc#~D=LdiG@CGV&KY__Ufu4rNzynEAe6DkJnkgojgCaF<*N
zV8#7D&{P;iMeFbu)Q-2i_Hi(DrvIq1#;g6pT?RU%k%AOkjc1>*@Vb7d3j<!5r5rXE
z$~0al>&QQg?}m$QkvDN2K5MT_r1E7q%?_w4p571ySHH}?l_lt|k*tp#abX(eHiT4q
zj!cFa`w^@BtV^_BtH;&tXoB0Rjge*AMg+-5@K>>g;jA{q%p2dn%!KHD_B_&F_tQ1C
z1T_+Gf7#Ot0_b8^JSFojFa=bNAbiNe-(`VTbyM?FdVO3MYbd+C7=!a%W2M8JPO5)=
z5P?5U$Vk~sr3AL`37M}1#_Q@t64c3c!c;*zN-qa#_lSL}^nFVB4AZ;y1)XL=uDg}f
zc2mi>)7b!9zOJhnACB@Zcc8!-#@lh;wrxUCfY}Xf%lE!Oxs#CdX>#f72RF1m2qw5$
zn7|(<h2QW)*g#B^K$*&YVRUi59VniMbFjR*bu$%%!A^KCUK=aJ5{o%JL2AJeW&nv+
zJh@H!BjP9eO0aF6bh&tgVrIi|vd-wSFOKDpO?)k~UDi+anuAAZUySc3(suDl(FoWK
z(7X^!Mk)p_vFRqlHu}<!&SLdFAs-Vcb=`G)m!sbze+xugsCXofF6&M_-6-UvTUb>?
z$G(nt(T#=~2-I^Ih)UPSIm&5e=E2wkdVJ{@y$>#E;3ZYWb5SI&jcJ=Wjr){@Q|zoW
zqFqhOIJL4N%@p&X8BDYMay(V@{UDN6&UboZ$f6~vHy149T^!WG`+IJVL>kgLEdcjH
zRNk$-%3wKpUXUrmA0`jRtDP!cmiR+4zXB>ZuNG9{`wS(}pd+O$!QczuYEIjy3u#8(
z)tRT&wcG2%#!Xv?!#&0C29po78ZW=(8>kR}Xd6gIE5hAAt2(52gc(Jh9m8#%ax7|e
zT^wtPal1suyzT&tL61d+3cX$ZDsx&P=mg3|Pshaf9?lz;bKzey%6~5_XG#Y;`1Tht
z=V?{Jo@Lz{YWrS+_9&v+lEu6@f?=zYWt5qsb%bPODwuPtw4!-)ZSF?t79Yqr!?NQo
zM*3%R{V4AqL}fqdvDtnNcO1H~TVb8<Sv?Ymy+RxUa9?IIKFU<pj=8XOzY$fB;{c=9
z=DCBImuku<N=07_1Uj9AKCYXomN<B>jmN0^)En4_r4tcmDh(zR><H0s_1D{fAe}8M
z#|=1m1jR>ng!ST=j*iHh(AFm8YN&a3NS;Z@se~sRr(iuK#^`bFeeY+=6Pk^dksDIf
zd)n!(^jwX{T<Olq;a&WI=St~<3@?@g*Q;LWcf-fFIH5~<(3se3MaG_gb;PZO&CI5r
z!wvd^PXfCCdcbk95>ex`T7vZPj8qyzhzIjVO;M2>u6->@rv}-m@{-YxG2=6~{deLp
zS6UVp4d0({E0UaxeJ`1!@l<8g!i(Budad1*j6Kt%zm1~Upx#OKSrVI0gTQd8bpWa*
zXj)!aeV{Uy4iiq*gONiu5fmnR%Gh;|2vkQWIo+rdrQ!$M-=xfa_@}se-S?myrd|GB
zw;weWK%*}b@~^vAc^LD^N4B%^t`{3pn%@Tt$qFq*E^SX<F4P`ak2~QhtZMo~%j*Ri
z#8<BF{8H)cE87*p*9r3}23VF^h@PAm=_^#+&s$Gt=1-*0V2y9R8(sqQ!Fm<gYy&la
z;-8d*mQfGZXU-a(FS9)TKt;0K{)JTLV$W%(QQg+QOu$XH-q~`^g+`Y~J5D`*v6EI^
z{W;xzcd(Chz(CiR=*e@9qTe$zF_3N}IFe|LmMLl08uJd}g?bi*J*FNoQsE7wtS}49
z^)ah)Xk{)N85GYOrUt+sw&WEy9CXA=oc`!OAwA6*-&j}x!Y_@0FB-xc%0=7PjXB|G
zP5qkv;LI;I8C38n%cI-3`pfCu{quXLxQRrIi@@#C#9nfaZBJqvJ;$a8Wg|OOfiPWc
z!PNK(=bNC{h^_FShTfdS^%L*3L07Q@vqhFc7MJiYGH(r)D>#`%NTcvLKV_Q~!-SGK
zl~F7zs7s6HW<T<BFT&@PRbeWNIy~qIGfA0t!!@R-jzsthnZj($jgMSjpe&+%B}cbK
z{z9V{t3_D>4#QkXL{MRbv6?9)f95}5j{dQ!yCWdZym(G339%MwfIi!V?`1dnv_q1S
zQ@xqGHC61lKB$94sU?J2kwsLouF{M0LoJlbB}T4|E^aoaBw9nWZTggvaFAK+mlM)_
z&@DQrU!<Iwf~78<yRK9Tt=%Pt5i$|IzH@ZZp#{x8B8d0bk3?%T9t6YmV|Ol@SE+I9
ziU-q{uhK;!Kg*u#G9$`3Sl_NV(-U#C%ha-<dD^9Fc_8VX;EY?qSt3x<aL{m#r$nI^
z%SK_ZIxvi`H(~nkvwVqKksG=h@STox(QwT=ccP}L_@-yfDimGGiw8n0fRlW4B%3W<
ztEA_{l=5+L{Z0CSc2?+H0-~&lf{Zb4GREs=*+@HhIUI0=fFcd4saVRgUPz*~H293l
zHN6WeR<`G(R9y#=Br0qO6s5xG9@e<iQO_X}k0Jq%R1@7I%JdnG+u5UNtDyC*bTY~!
ztnn84Z#qj7XYx6+bl}Cdnkbwc296ITWkYn~+sr5+(W9?ka58859WIX{ZEbdztg=7&
zrrh<J!?isaLOrWaLazX3dN6cy;rJRYS`v?`=}sQT0l}Ke50Nf{Zsb*!w55nrwdzZX
zAZ$y@=4v-W$P%^D9CW0%F$6u&&7*9!iQ~_PU=O3}R0?Uqj*Cc5TS#%$Ke~?`-`L;?
zLI{enE3UJPx*~yOy`{NIAXS<z$;0G=cfqUJ9Hp|gOSc5-1wuK+zgC#0sH72O!9=p7
ztH0R5&E%ww4O=h2(L-)ut3<V!EYovVO7oXHts#;(oJ(SiK5oQK-)Kgavx;!uj(86E
zT34qw*6UrIEUO>~)}p>%R(5nK`h0~Rrso&|5iI#El5~skMbzrmQIVamcNL*h?mF8C
zfjyzAhW)hk%ndyyN?oOHLAVm{E66&Zs*Jqmt<l9z7Diq|#-R3~Hp#ZYZM>WoATg}p
zV!L$IZ&Bpjy)B2yiCNGP)4n9Z(d=%^pHT40osg@v-6xoX0hVpb5Nl{AccG?nDBs?M
zJ=Tq*mJi^-V<^kGFp%xEWHCTY15_#i?S6k=z$J;Z)O_6wyNJU$2MW#WeFaennY)rd
z($ZXv)O%(k)Ktf!uD={jzwfenj*JW2Jd4O#T%M_wMi*4)krY`X!4PPsdb<H#S(>3q
zcMT=SI9|pzUkNOa#=D5Isaz-%DBSQC*@VWz8y%UA5@IV0*teVj9poW{WCNgAog=q0
z*oQPvGX1PUzsOQKca+!jhGlR|-&0i${Qf5HKx<YLP(o)lkX<h7XFZ8xEi{(P5APq4
z&yxz*CGHNp^4;>|7)~|EH59({z2~$I-BTvsK+GL(Mw<QSug3}LOwSB9o0`K`ZO-+I
zc(nyEsXs<!)=oh`c!)Y(tofX3+jj-duYI(u8HSA>#b0X<FE81-d#}G=W32osHk{=8
z_`jAKhBs-r1qF1?Jd!^_yeUH;-omouQoYrsMUG*)(>KF{6~sktsqkM*++w15U*{TC
z09_CaF)FB_g2jYW#l+Q?k-1QaB8Y|_wWP~h_AN|RoOZwMI6M{fy*$i6DEHM_e?GAc
z)Y7Ro5q_cy37PMj^Omg;<bE$<;v_Gt58>{*#OUelBoD4$xzrM@Gm!K=wFcPWxNf2l
z5cJ)XuJq96PSZ4VS^yh<p%9AfC~_MwH95A|yKdb_6mDUt)+^qEx6FvvmYm^7J<lGw
zdk$2nmw)><Aj7Y7lHZUs&@PvRzRiD{;;_;Tnl$|U+xX_xgHt6}M4~=lySj9Pm)K5u
zQQz|;5y2GvK4XC5vk!(C(uO)Y<#NGW>yrDVJU8m)t}gTI)Ar9d?F-kj4T*F06z#*F
zV9Vc%t3&X-=<UcAXW!I&pVi2NS9Upja~7`g6zl`BTF(L=mA4i{>bIADsDZ%I`8cAn
z7^Xa0-M*NB!WOmMR{X`THk^7iDpnsm8uwP<GXjkcP}tPH?aqa#P4Zl`eN+aS`Of}R
zdCi+U=5F2ghA1xFw&RXlz*jD+!z*Ney!PHsR?}&jxBEi-`CCFqY_eIT+)cQ;ri1Ud
z0?(WQ8!?$2Ub%wsMki;~x5j%n62B|CKNVdevY2qBM5L>zZGf-N?agN#^^)bwd$|WR
zfnoc=UrL-40f(v#tjrkr#mD=CHv2u<dt>?)Ta!C4k|u#yvl>Ubljge>7_c9CTc*R2
zD?Xh(9*~01X7YRxgD9#z;kJa@DLR1sXB)n(TNF_#B+G&!vrGaxxW9^;YE^*mQIFAI
z2<2a_Kyfh`9SA={yk1lQL6x;^1r18VpQ$a4?}>w{MPsZ=eRqoRYODiXK1rNq_?JfT
z%Fic<@Y*OFv8!fVN}Ajza9GQz+vd*o%|r}H1H%X+q1#=?O&A`wC`g2VpyQXU?WZ9;
zO6R{3uGrAK(@ZW;W-#i609KmqD|K?e?nUG4h8r655ZZNb%IvbpRS4J>tIaL?-*S0-
zCKg_^Lb!y*Q4Z*DX;$g$KU?+1@C!c9O3rYfg=Vf*cV8No;$w_@eSglpBL9iIf7{*(
zmoVfwy1%5+f-tnvUNj{}BoARwIT=Kxep^=5VC0L-5AI2a!WukU%yvy{&G$yOe?ICX
z7tah7J&c?dV=psJcOQPkAH>qA>+jKw&T&?|s&iI2jE0e#Cn&ZbjpJK!lYAJ_mA?VE
zdBM)wK6!=Ht`y+49qqzISQ74Hmze_2#rtMh_{D%<+vym3OAvbj7wxNo5wyM@glc)E
zcSO_NqxGx#vYVIWu$y}0@y7Eodgy)+4k$UuaYC-FH+E7Wh*;o-<x6IC+3_QC%jYxq
zQV6P`I&EPMoi8iOw|oS&e0E;nenwN0&j#t(v=Ck6ppFOqx?{&ozSvWZ`m-?o@(j<H
zuyIBEDb>M5u^jX-LZ8d>;!sqv&QCu=Z?GeR?jQB*s*Et+O|s34mUwOQ%<dZ*f7Gzb
z2IH>7CeYbL#5e?6meo+j{+J~0Z_7B1NL6#NBaGiPru*^IEIqmX?Q_$yNB{E(kBaj$
z3|D)&T;A+T0k_9Xw>Xra*l#)WvA*)2w;8<$v(c$~^Lva4x@tIeHJVW`*4(lJAXek9
z_<0HY1L+?=hR@V7?%NKyR20H(PyMY)3y#vI0ZYl<<+$a1#%g46!mAtTM5N@%M}r&s
z;}Li+pDg-g1}!MLwVbeCYduh$o83_7y#w5EA{(wKP)3s@CAsxmXqnyTm56@$i-<HH
zaD^y&VSSbP^LvG)pWMdnwka$IZ6hOiiBB{{r#RO9S1mq6=Tftp%c>tJx6P5JV%Hk0
z?>7+}r62p+4#9zx4c=T=YAy`+4+Q^2BbNS#H1ji?D)?%M$L}L>PzHFh;Z}VJ_(iT~
z8Z-;qW!{84^>U_r`8s<j-z0K$nL&Ayn33{vn*mS0!p<;QAK1vbli`^EyI#TGx!1%O
zv81@QJ`%P*Qd}RnH}f_(@k;I^mv|9a$$NCJkQhRpx;7Ub-{@z^-a|@Ilr{@Zymu0X
zDRUL#((Q**hQc!@5L6Bfe526R9BV!ijt3P-;)%8<ddaTI0%mRW{YF^EXC0SYf)N&n
zo;qT@<;zr6vGY)F05aDp#Af2&8HMZz5FhHlL`CT2t3MlLVO8YS6mZWQ<KQox8Qf+L
z>JX?;FvHYB4crjgx#n>~b4@*G^mW*|vE3z-YNWlh0;}?3hRcul4DE5^>3#<dXe~W1
zyVq9!h|aX|;1QMghKl7idLQf0b_I}AH>(`zE0?9yPVf`m7p?4+!>M~ZLF@**Oc%{o
z@-3mD*bC^Kbi~1=E^32oyvn`6Gi3SK&%1v1PP<4e7d8%v-eTDR-c8H^%7MVS<BI0s
z!r6Brv1Ni+BHlM@G^T{UP=)+prZ1T&%)ZFg{((kj>pJje*HhaXes@7!s^1+VGT}vL
ziSyN*n(d6SM9ycF+iqAOZQh4W2<w~}F<!IH#6RI^N{_VngCleh)H<0+D$()D*5sof
zoePmFl8KNza@caG03>LJwF~7NCl}^C&>%mRy`k|DtdqkWF<ID-vF3pr;xqQyDa=y!
z=>yuP6Bdf)m05-#eK8s{(%uWPyL%!L0c2QpHT-vhvt*>%P<gX1Tmq};v|b^zmK<%^
za{6DDSs=ddQXV7qx54_FhTw@~A4_wzT0LPYNmhI)Rxn=(Q+bPco45THqB56#<@UEo
z@zP7zX%s}q1h6$Xw>{Pd|Na8fy#GC_w^&w@NttH0&jHuAdmENrm#<o}Zm|N_Ot=z;
zqA4zcxh(P>Mkuu&NpDJO2}$`tPWNU&=2|!?Gw`cUa1iRew&4-V0MrnBzHRjDCi$$<
zc}z^w6vropd|f3{rv!{o=t<K{B^K-iU|pYK<}UhP`2r8+wV!W1#rYoiKaM<Bt+4DM
z#u7rOeYbTlN94I+9po|Fwc|Glm+N@%;4I(Yg2dd{TdLCXgDLfCxGT84xf(|cM~7>&
z!Ev_p(XLir-MjJa+|}}t`RrF#8aI4h>@Pypb`&9IFh*hA*c!nRyb3dI>O>`VkxyX*
z34Kg&<TD@itaviT8(%DIV=HQQR4sT&jf9gGyU3fPKRq@_(*<T(mp5e@<4gOFi!2B=
z_L0gWY~3VZH3IN;d1F?rE>=SO`^ukD<L0;CL@rioVDwcl<g3geR*D@1o#f4BrxpgK
z2LtE%S!MH<4-_vRT7ko9%~vUC9{677gB1hMl`YdcbfZ&`71!~P#o$d`UL9?c_eV}X
zxaX<aKww#slqc7!aA@V}n;;xieCO8BbojbVjM=$wYi*-;?9%W~6bGi5FeS@sD!kDW
zx=fL-ku_P#YBmDsWq_PiIC9o~9(q~d?`r^dUB--aMN^BE9YOW&u?mXsvOKJ2(ZNMA
z)Vf&%eN58Kmdy2oFovV2f3F)pKg*$L)amt^frYBRnCY>x%ET5M-t-+PYVyi7O!L85
zNp<r$#<<#ugHeRjljok>dhhKj73}KZ@zWc+tu7rqU-LmeZ_7x;C-y8LkTzt2o4*bY
zq3~Kmima|?5yo*R>97X(1h?gfknd9g{Pv8cjX4~aJmK!|hl-%68wHOeD;pFK;d)7{
zzHq$dEkp3Rqkv{!AMY^ql*;d2)8vb`6?NEeS%o7vceMVoQR#=3Wwgym#C0|BiX%b9
z7i7f=c|hP^zPvXI?m}5ALQ>#Jc(cy<?k~B0-mJS>yJs4|tjY+To=R4I`hx9vz6AXN
zDD(js{DeX@T3T%&Rvo4KcKO)=Urt`p<Om@Qp#_vy>aZ3J|FZ`j$-@tMJDdnolnUtg
z?n5K^BuW4;eBA^2&$bfJP=&TK6I<N*OdF+I2D)kv=Iq?<D9m3!#88vmcw1V)hx0aD
z@oo3T%nv2Bp`Vf{-Odm4g#<sZJ|40|gg-_l89JwKdM7@M@i^kKyqtN8@T38OXy#fx
zDL#%wd_@ku=H@ySbT;S4T|Y<36A=|!ZepOAG*!*CB!O1DhzKt|fw2f3ik5SsvS>Tp
zjNy2SZ!U0$lHL<OXfRuubO62@UVkl5VAy<^5^Hfg=EmCNf@6VjH8eQ-#Wrspi))lS
zV(Nxbo`XAem}_h7lA~FMqm)Lts~>%kqL*V$cI+1kq-UqXG=pH0y;4YM%P1fUU+eb+
zOYeQlaObrX5FiLt@51p!BlJ3f0GkHKGOtc7kBw*Up2zzjs<GUXtB5!h8~HSQyr-*Q
zZ(AfCDtkT1xf%2NV$7i~P#6sC*2`t3x+n^bGfYGSTb))eYx5LkNV`B)0qu)edq54g
zCY4H~PU%Lr4)9$P*%nx!FBZm!$!%ds(Ig5K!$?-BMTGnFr?#d6I_>+!5gujYRk8bA
zEo$7tsPO&bNLJh{(k%MWUk$yAzesj`C>5z8-gETL4)7HjcnZGAvGEh;P>-bXD^Zoa
zpa)9Pj}xw2H9j?N{GtpUy2BT^x|Q}@u5>-HnE)IGl8_>7-2t-^t)+eR4x?{+7jJ_t
zb-#HeEq35!9rg`IoG;~k8@cg~{PS(xl7PekMVIsdo1zB9YQ<7FzkB`TU{?Y|VSm_n
z`uAE1&FDF3gtnV81ATUIvZrBxz-Fe<ij0t`UwNG0V5&SuX<lZhduLnX!4l(Cy#0DT
z+Z9{j^@;LV0-oaO`@aNrXhr5%LRM@4cFg$7U>xN5qYeW>U!T(F0X~&rB#w}I+NAlu
z6$W7#T{cqs6Gjyuqc_O6JxhKW8Y;yOKwvSrmJ`PeT7_Z{k*2YR%CRu7&sY5rtPl77
z_9OamqTW#!QMq!usC}eU-WyJ+zfqv+tl8m~Qs3T-J%uls%l=6U6FCuraZ3_2y1S2M
zuTtY$@<%b$DrQYY4b|k+k#O%-(C+xuWU(x}L&QO1H>e6Hcaa4m1a67><H8I8aY+t%
zp|3)?iV}%F#`^^961tM6=;=ysUer(@>r<i~^!&#Y8Z#hJ3kP8n?~lQMRJg$cfs+@z
zi+=~Nw&A{x@dO0=|D@^_(nA7;uj}^I!uuaQ-0K()ed<-|-)wTo2S+TBh&xUGi<gjm
zy@7{>asNl$1eTdO?{K0a_rG`zOfz$xtu565i2MJp{!Tn&pQ1!a&RF2}bswN8t0q$=
IWfuB>05)+qbN~PV

literal 0
HcmV?d00001


From ea3471aa6ab8d577fec34e369ef6d667e09ce441 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Thu, 31 Aug 2023 08:21:56 -0700
Subject: [PATCH 20/26] vclmul description

---
 doc/vector-extra/insns/vclmul-32e.adoc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/vector-extra/insns/vclmul-32e.adoc b/doc/vector-extra/insns/vclmul-32e.adoc
index e1874bf2..fbd9e886 100644
--- a/doc/vector-extra/insns/vclmul-32e.adoc
+++ b/doc/vector-extra/insns/vclmul-32e.adoc
@@ -69,9 +69,9 @@ significant SEW bits of the carry-less product.
 
 [NOTE]
 ====
-The 64-bit carryless multiply instructions can be used for implementing GCM in the absence of the `zvkg` extension.
-We do not make these instructions exclusive as the 64-bit carryless multiply is readily derived from the
-instructions in the `zvkg` extension and can have utility in other areas.
+The 32-bit carryless multiply instructions can be used for implementing GCM in the absence of the `zvkg` extension.
+In particular for implementation with `ELEN=32` where `Zvkg` cannot be implemented.
+It can also be used to speed-up CRC evaluation.
 ====
 
 Operation::

From ed8f89e9207e200537374ac9c8e2569fb0ee20e5 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Wed, 17 Jan 2024 16:42:08 -0800
Subject: [PATCH 21/26] fixing a few typos + clarification

---
 doc/vector-extra/insns/vclmul-32e.adoc                  | 8 ++++----
 doc/vector-extra/insns/vclmulh-32e.adoc                 | 8 ++++----
 doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc | 4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/doc/vector-extra/insns/vclmul-32e.adoc b/doc/vector-extra/insns/vclmul-32e.adoc
index fbd9e886..ee46d468 100644
--- a/doc/vector-extra/insns/vclmul-32e.adoc
+++ b/doc/vector-extra/insns/vclmul-32e.adoc
@@ -49,9 +49,9 @@ Arguments::
 |Direction
 |Definition
 
-| Vs1/Rs1 | input  |  multiplier
-| Vs2 | input  |  multiplicand
-| Vd  | output | carry-less product low
+| `vs1`/`rs1` | input  | multiplier
+| `vs2`       | input  | multiplicand
+| `vd`        | output | lower part of carry-less 
 |===
 
 [NOTE]
@@ -60,7 +60,7 @@ Arguments::
 ====
 
 Description::
-Produces the low half of 128-bit carry-less product.
+Produces the low half of `2*SEW`-bit carry-less product.
 
 Each SEW-bit element in the `vs2` vector register is carry-less multiplied by
 either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value
diff --git a/doc/vector-extra/insns/vclmulh-32e.adoc b/doc/vector-extra/insns/vclmulh-32e.adoc
index c90d8b5f..e10c38c2 100644
--- a/doc/vector-extra/insns/vclmulh-32e.adoc
+++ b/doc/vector-extra/insns/vclmulh-32e.adoc
@@ -49,9 +49,9 @@ Arguments::
 |Direction
 |Definition
 
-| Vs1 | input  | multiplier
-| Vs2 | input  | multiplicand
-| Vd  | output | carry-less product high
+| `vs1`/`rs1` | input  | multiplier
+| `vs2`       | input  | multiplicand
+| `vd`        | output | upper part of carry-less 
 |===
 
 [NOTE]
@@ -60,7 +60,7 @@ Arguments::
 ====
 
 Description::
-Produces the high half of 128-bit carry-less product.
+Produces the high half of `2*SEW`-bit carry-less product.
 
 Each SEW-bit element in the `vs2` vector register is carry-less multiplied by
 either each SEW-bit element in `vs1` (vector-vector), or the SEW-bit value
diff --git a/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc
index a5b6af26..6881c8dc 100644
--- a/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc
+++ b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc
@@ -1,4 +1,4 @@
-[[zvbc,Zvbc]]
+[[zvbc32e,Zvbc32e]]
 === `Zvbc32e` - Vector Carryless Multiplication
 
 General purpose carryless multiplication instructions which are commonly used in cryptography
@@ -8,7 +8,7 @@ These instructions are only defined for `SEW`=32.
 Zvbc32e can be supported when `ELEN >=32`.
 
 
-Note:: The extension `Zvbc32e` is independent from `Zvbc` where the same instructions are defined for `SEW=64`.
+Note:: The extension `Zvbc32e` is independent from `Zvbc` which defines the same instructions for `SEW=64`.
        When `ELEN>=64` both extensions can be combined to have `vclmul.v[vx]` and `vclmulh.v[vx]` defined for both `SEW=32` and `SEW=64`.
 
 [%autowidth]

From 1bfa9e36da5c5d23c4f7f18cf6bd4659aec201fb Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nicolas.brunie@sifive.com>
Date: Thu, 1 Feb 2024 09:51:13 -0800
Subject: [PATCH 22/26] [vector-crypto-extra] typo fix and improvements

---
 doc/vector-extra/insns/vclmul-32e.adoc                 |  4 ++--
 doc/vector-extra/insns/vclmulh-32e.adoc                |  2 +-
 .../riscv-crypto-vector-extra-inst-table.adoc          |  2 +-
 .../riscv-crypto-vector-extra-introduction.adoc        |  4 ++--
 .../riscv-crypto-vector-extra-zvbc32e.adoc             |  4 ++--
 doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc  | 10 +++++-----
 6 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/doc/vector-extra/insns/vclmul-32e.adoc b/doc/vector-extra/insns/vclmul-32e.adoc
index ee46d468..1c24c15d 100644
--- a/doc/vector-extra/insns/vclmul-32e.adoc
+++ b/doc/vector-extra/insns/vclmul-32e.adoc
@@ -51,7 +51,7 @@ Arguments::
 
 | `vs1`/`rs1` | input  | multiplier
 | `vs2`       | input  | multiplicand
-| `vd`        | output | lower part of carry-less 
+| `vd`        | output | lower part of carry-less multiply 
 |===
 
 [NOTE]
@@ -70,7 +70,7 @@ significant SEW bits of the carry-less product.
 [NOTE]
 ====
 The 32-bit carryless multiply instructions can be used for implementing GCM in the absence of the `zvkg` extension.
-In particular for implementation with `ELEN=32` where `Zvkg` cannot be implemented.
+In particular for implementation with `ELEN=32` where `Zvkg` cannot be implemented. 
 It can also be used to speed-up CRC evaluation.
 ====
 
diff --git a/doc/vector-extra/insns/vclmulh-32e.adoc b/doc/vector-extra/insns/vclmulh-32e.adoc
index e10c38c2..3800452d 100644
--- a/doc/vector-extra/insns/vclmulh-32e.adoc
+++ b/doc/vector-extra/insns/vclmulh-32e.adoc
@@ -51,7 +51,7 @@ Arguments::
 
 | `vs1`/`rs1` | input  | multiplier
 | `vs2`       | input  | multiplicand
-| `vd`        | output | upper part of carry-less 
+| `vd`        | output | upper part of carry-less multiply 
 |===
 
 [NOTE]
diff --git a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc
index 01c1bd23..d52d3ff5 100644
--- a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc
+++ b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc
@@ -27,7 +27,7 @@ The new/modified encoding are in bold and underlined.
 |100000  | | | |            | 100000 |V| | vsm3me       | 100000 | | |
 | 100001 | | | |            | 100001 |V| | vsm4k.vi     | 100001 | | |
 | 100010 | | | |            | 100010 |V| | vaesfk1.vi   | 100010 | | |
-| 100011 | | | |            | 100011 | | | __**vghsh.vs**__ | 100011 | | |
+| 100011 | | | |            | 100011 |V| | __**vghsh.vs**__ | 100011 | | |
 | 100100 | | | |            | 100100 | | |              | 100100 | | |
 | 100101 | | | |            | 100101 | | |              | 100101 | | |
 | 100110 | | | |            | 100110 | | |              | 100110 | | |
diff --git a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc
index fd7590b0..c01afa59 100644
--- a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc
+++ b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc
@@ -3,8 +3,8 @@
 
 This document describes the proposed _vector_ _extra_ cryptography
 extensions for RISC-V.
-Those extensions extends the _vector_ cryptography extensions for RISC-V,
-providing extra feature not mandatory for a high performace implementation but which
+Those extensions extend the _vector_ cryptography extensions for RISC-V,
+providing extra features not mandatory for a high performace implementation but which
 can help further improve the efficiency of the algorithms that use them.
 All instructions proposed here are based on the Vector registers.
 
diff --git a/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc
index 6881c8dc..9cf42177 100644
--- a/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc
+++ b/doc/vector-extra/riscv-crypto-vector-extra-zvbc32e.adoc
@@ -16,8 +16,8 @@ Note:: The extension `Zvbc32e` is independent from `Zvbc` which defines the same
 |===
 |Mnemonic
 |Instruction
-| vclmul.[vv,vx]     | <<insns-vclmul-32e>>
-| vclmulh.[vv,vx]    | <<insns-vclmulh-32e>>
+| `vclmul.[vv,vx]`     | <<insns-vclmul-32e>>
+| `vclmulh.[vv,vx]`    | <<insns-vclmulh-32e>>
 
 |===
 
diff --git a/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc
index 40787c63..f54683f4 100644
--- a/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc
+++ b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc
@@ -6,11 +6,11 @@
 Instructions to enable the efficient implementation of parallel versions of GHASH~H~ which is used in Galois/Counter Mode (GCM) and
 Galois Message Authentication Code (GMAC).
 
-The instructions inherit the same constraints (element group size, data independent execution timing and `vl`/`vstart` multiple constraints).
+The instructions inherit the same constraints as the ones mandated for `Zvkg` instructions: (element group size, data independent execution timing and `vl`/`vstart` multiple constraints).
 
-All of these instructions work on 128-bit element groups comprised of four 32-bit elements.
+All of these instructions work on 128-bit element groups comprised of four 32-bit elements, in element group parlance `EGS=4`, `EGW=128` and the instructions are only defined for `SEW=32`.
 
-To help avoid side-channel timing attacks, these instructions shall be implemented with data-independent timing.
+To help avoid side-channel timing attacks, these instructions shall always be implemented with data-independent timing.
 
 The number of element groups to be processed is `vl`/`EGS`.
 `vl` must be set to the number of `SEW=32` elements to be processed and
@@ -25,8 +25,8 @@ Likewise, `vstart` must be a multiple of `EGS=4`.
 |EGW
 |Mnemonic
 |Instruction
-| 32 | 128 | vghsh.vs | <<insns-vghsh-vs>>
-| 32 | 128 | vgmul.vs | <<insns-vgmul-vs>>
+| 32 | 128 | `vghsh.vs` | <<insns-vghsh-vs>>
+| 32 | 128 | `vgmul.vs` | <<insns-vgmul-vs>>
 
 |===
 

From eff2e907976e475db69257708ce38d76130cc6dd Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nibrunie@gmail.com>
Date: Thu, 1 Feb 2024 09:59:24 -0800
Subject: [PATCH 23/26] [vector-extra] updating revnumber to v0.0.3

---
 doc/vector-extra/riscv-crypto-spec-vector-extra.adoc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
index 322450b9..040573e8 100644
--- a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
+++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
@@ -2,8 +2,8 @@
 = RISC-V Cryptography Extensions Volume III: Extra Vector Instructions
 :description: The vector extra cryptography extensions for the RISC-V ISA.
 :company: RISC-V.org
-:revdate: 31 August 2023
-:revnumber: v0.0.1
+:revdate: 1 February 2024
+:revnumber: v0.0.3
 :revremark:
 :url-riscv: http://riscv.org
 :doctype: book

From 4ce6a83830f9241eb45d7258684310ec7e499f09 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nibrunie@gmail.com>
Date: Tue, 6 Feb 2024 20:14:19 -0800
Subject: [PATCH 24/26] [v0.0.4] applying internal review feedback

---
 .../riscv-crypto-spec-vector-extra.adoc       | 33 ++++++++++---------
 .../riscv-crypto-vector-extra-inst-table.adoc |  4 +--
 ...iscv-crypto-vector-extra-introduction.adoc |  6 ++--
 .../riscv-crypto-vector-extra-zvkgs.adoc      | 18 ++++++----
 4 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
index 040573e8..3dfd4ce5 100644
--- a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
+++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
@@ -1,9 +1,9 @@
 [[riscv-doc-template]]
-= RISC-V Cryptography Extensions Volume III: Extra Vector Instructions
-:description: The vector extra cryptography extensions for the RISC-V ISA.
+= RISC-V Cryptography Extensions Volume III: Additional Vector Instructions
+:description: The addtional vector cryptography extensions for the RISC-V ISA.
 :company: RISC-V.org
-:revdate: 1 February 2024
-:revnumber: v0.0.3
+:revdate: 6 February 2024
+:revnumber: v0.0.4
 :revremark:
 :url-riscv: http://riscv.org
 :doctype: book
@@ -46,7 +46,7 @@ endif::[]
 [colophon]
 = Colophon
 
-This document describes the Vector Cryptography Extra extensions to the
+This document describes additional Vector Cryptography extensions to the
 RISC-V Instruction Set Architecture.
 
 This document is _Discussion Document_.
@@ -73,6 +73,7 @@ for more information.
 
 Contributors to this specification (in alphabetical order)
 include: +
+Eric Biggers,
 Ken Dockser,
 Markku-Juhani O. Saarinen,
 Nicolas Brunie,
@@ -95,26 +96,28 @@ include::riscv-crypto-vector-extra-introduction.adoc[]
 [[crypto_vector_extensions]]
 == Extensions Overview
 
-The section introduces all of the extensions in the Vector Cryptography Extra
+The section introduces all of the extensions in the Additional Vector Cryptography 
 Instruction Set Extension Specification.
 
 
-All the Vector Crypto Extra Extensions can be built
+All the Additional Vector Crypto Extensions can be built
 on _any_ embedded (Zve*) or application ("V") base Vector Extension.
 
 // See <<crypto-vector-element-groups>> for more details on vector element groups and the drawbacks of
 // small `VLEN` values.
 
 
-All _cryptography-specific_ instructions defined in this Vector Crypto specification (i.e., those
-in <<Zvkgs>>, but _not_ <<zvbc32e>>) shall
-be executed with data-independent execution latency as defined in the
+As the instructions defined in this specification might be used to implement cryptographic primitives
+ they may be implemented with data-independent execution latencies as
+defined in the
 link:https://github.com/riscv/riscv-crypto/releases/tag/v1.0.1-scalar[RISC-V Scalar Cryptography Extensions specification].
-It is important to note that the Vector Crypto instructions are independent of the
-implementation of the `Zkt` extension and do not require that `Zkt` is implemented.
 
-//This specification includes a <<Zvkt>> extension that, when implemented, requires certain vector instructions
-//(including <<zvbb>>, <<zvkb>>, and <<zvbc>>) to be executed with data-independent execution latency.
+If `Zvkt` is implemented, all the instructions from `Zvbc32e` (`vclmul[h].[vv,vx]`) 
+shall be executed with data-independent execution latency as
+
+Whether `Zvkt` is implemented or not, all instructions from `Zvkgs` (`vgmul.vs`, `vghsh.vs`)
+shall be executed with data-independent execution latency.
+
 
 Detection of individual cryptography extensions uses the
 unified software-based RISC-V discovery method.
@@ -134,7 +137,7 @@ include::./riscv-crypto-vector-extra-zvkgs.adoc[]
 
 // ------------------------------------------------------------
 
-[[crypto_vector_extra_insns, reftext="Vector Cryptography Extra Instructions"]]
+[[crypto_vector_extra_insns, reftext="Additional Vector Cryptography Instructions"]]
 == Instructions
 
 
diff --git a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc
index d52d3ff5..ee5a09c6 100644
--- a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc
+++ b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc
@@ -1,9 +1,9 @@
 [appendix]
 [[crypto_vector_instructions]]
-=== Crypto Vector Cryptographic Instructions
+=== Additional Vector Cryptographic Instructions
 
 OP-P (0x77)
-Crypto Vector instructions, including Zvkgs, except Zvbb and Zvbc
+Additional Vector Crypto instructions, including Zvkgs, except Zvbb and Zvbc
 The new/modified encoding are in bold and underlined.
 
 // [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"]
diff --git a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc
index c01afa59..8d057e6a 100644
--- a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc
+++ b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc
@@ -1,10 +1,10 @@
 [[crypto_vector_introduction]]
 == Introduction
 
-This document describes the proposed _vector_ _extra_ cryptography
+This document describes the proposed _additional_ _vector_ cryptography
 extensions for RISC-V.
 Those extensions extend the _vector_ cryptography extensions for RISC-V,
-providing extra features not mandatory for a high performace implementation but which
-can help further improve the efficiency of the algorithms that use them.
+providing additional features not mandatory for a high performace implementation but which
+can help further improve the efficiency some algorithms (e.g. CRC, AES-GCM).
 All instructions proposed here are based on the Vector registers.
 
diff --git a/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc
index f54683f4..c8d83965 100644
--- a/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc
+++ b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc
@@ -1,16 +1,20 @@
 [[zvkgs,Zvkgs]]
 === `Zvkgs` - Vector-Scalar GCM/GMAC
 
-`Zvkgs` depends on `Zvkg`, it extends the existing `vghsh.vv` and `vgmul.vv` instructions with new vector-scalar variants: `vghsh.vs` and `vgmul.vs`.
-
 Instructions to enable the efficient implementation of parallel versions of GHASH~H~ which is used in Galois/Counter Mode (GCM) and
 Galois Message Authentication Code (GMAC).
 
-The instructions inherit the same constraints as the ones mandated for `Zvkg` instructions: (element group size, data independent execution timing and `vl`/`vstart` multiple constraints).
+`Zvkgs` depends on `Zvkg`. It extends the existing `vghsh.vv` and `vgmul.vv` instructions with new vector-scalar variants: `vghsh.vs` and `vgmul.vs`.
+
+The instructions inherit the constraints defined in `Zvkg`:
+
+- element group size (EGS) is 4
+- data independent execution timing
+- `vl`/`vstart` must be multiples of EGS=4multiple constraints
 
-All of these instructions work on 128-bit element groups comprised of four 32-bit elements, in element group parlance `EGS=4`, `EGW=128` and the instructions are only defined for `SEW=32`.
+All of these instructions work on 128-bit element groups comprised of four 32-bit elements.
 
-To help avoid side-channel timing attacks, these instructions shall always be implemented with data-independent timing.
+To help avoid side-channel timing attacks, these instructions shall be implemented with data-independent timing.
 
 The number of element groups to be processed is `vl`/`EGS`.
 `vl` must be set to the number of `SEW=32` elements to be processed and
@@ -25,8 +29,8 @@ Likewise, `vstart` must be a multiple of `EGS=4`.
 |EGW
 |Mnemonic
 |Instruction
-| 32 | 128 | `vghsh.vs` | <<insns-vghsh-vs>>
-| 32 | 128 | `vgmul.vs` | <<insns-vgmul-vs>>
+| 32 | 128 | vghsh.vs | <<insns-vghsh-vs>>
+| 32 | 128 | vgmul.vs | <<insns-vgmul-vs>>
 
 |===
 

From acaf911873fddcd0e240532ccfb0622c73ff78d9 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <82109999+nibrunieAtSi5@users.noreply.github.com>
Date: Tue, 6 Feb 2024 20:25:50 -0800
Subject: [PATCH 25/26] Apply suggestions from code review

Signed-off-by: Nicolas Brunie <82109999+nibrunieAtSi5@users.noreply.github.com>
---
 doc/vector-extra/riscv-crypto-spec-vector-extra.adoc   |  2 +-
 .../riscv-crypto-vector-extra-inst-table.adoc          |  2 +-
 doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc  | 10 +++++-----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
index 3dfd4ce5..53da28cc 100644
--- a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
+++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
@@ -113,7 +113,7 @@ defined in the
 link:https://github.com/riscv/riscv-crypto/releases/tag/v1.0.1-scalar[RISC-V Scalar Cryptography Extensions specification].
 
 If `Zvkt` is implemented, all the instructions from `Zvbc32e` (`vclmul[h].[vv,vx]`) 
-shall be executed with data-independent execution latency as
+shall be executed with data-independent execution latency.
 
 Whether `Zvkt` is implemented or not, all instructions from `Zvkgs` (`vgmul.vs`, `vghsh.vs`)
 shall be executed with data-independent execution latency.
diff --git a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc
index ee5a09c6..8bd81a2d 100644
--- a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc
+++ b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc
@@ -3,7 +3,7 @@
 === Additional Vector Cryptographic Instructions
 
 OP-P (0x77)
-Additional Vector Crypto instructions, including Zvkgs, except Zvbb and Zvbc
+Vector Crypto instructions, including `Zvkgs`, except `Zvbb` and `Zvbc`.
 The new/modified encoding are in bold and underlined.
 
 // [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"]
diff --git a/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc
index c8d83965..99155dc5 100644
--- a/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc
+++ b/doc/vector-extra/riscv-crypto-vector-extra-zvkgs.adoc
@@ -10,11 +10,11 @@ The instructions inherit the constraints defined in `Zvkg`:
 
 - element group size (EGS) is 4
 - data independent execution timing
-- `vl`/`vstart` must be multiples of EGS=4multiple constraints
+- `vl`/`vstart` must be multiples of EGS=4
 
-All of these instructions work on 128-bit element groups comprised of four 32-bit elements.
+All of these instructions work on 128-bit element groups comprised of four 32-bit elements, in element group parlance `EGS=4`, `EGW=128` and the instructions are only defined for `SEW=32`.
 
-To help avoid side-channel timing attacks, these instructions shall be implemented with data-independent timing.
+To help avoid side-channel timing attacks, these instructions shall always be implemented with data-independent timing.
 
 The number of element groups to be processed is `vl`/`EGS`.
 `vl` must be set to the number of `SEW=32` elements to be processed and
@@ -29,8 +29,8 @@ Likewise, `vstart` must be a multiple of `EGS=4`.
 |EGW
 |Mnemonic
 |Instruction
-| 32 | 128 | vghsh.vs | <<insns-vghsh-vs>>
-| 32 | 128 | vgmul.vs | <<insns-vgmul-vs>>
+| 32 | 128 | `vghsh.vs` | <<insns-vghsh-vs>>
+| 32 | 128 | `vgmul.vs` | <<insns-vgmul-vs>>
 
 |===
 

From 38d0834a6785f4641d613851b87801d011c79ce6 Mon Sep 17 00:00:00 2001
From: Nicolas Brunie <nibrunie@gmail.com>
Date: Thu, 7 Mar 2024 20:56:49 -0800
Subject: [PATCH 26/26] Fixing typos / corrections / introducing Zvbc32e inst
 table

---
 doc/vector-extra/insns/vclmul-32e.adoc               |  2 +-
 doc/vector-extra/insns/vclmulh-32e.adoc              |  6 +++---
 doc/vector-extra/insns/vghsh-vs.adoc                 | 12 ++++++------
 doc/vector-extra/insns/vgmul-vs.adoc                 | 10 +++++-----
 doc/vector-extra/riscv-crypto-spec-vector-extra.adoc |  8 ++++++--
 .../riscv-crypto-vector-extra-inst-table.adoc        |  5 ++---
 .../riscv-crypto-vector-extra-introduction.adoc      |  5 +++--
 7 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/doc/vector-extra/insns/vclmul-32e.adoc b/doc/vector-extra/insns/vclmul-32e.adoc
index 1c24c15d..7a47de1c 100644
--- a/doc/vector-extra/insns/vclmul-32e.adoc
+++ b/doc/vector-extra/insns/vclmul-32e.adoc
@@ -56,7 +56,7 @@ Arguments::
 
 [NOTE]
 ====
-`vclmul` instruction was initially defined in `Zvbc` with only `SEW=64-bit` support, this page describes how the specification is extended in `Zvbc32e` to support `SEW=32 bits`.
+`vclmul` instruction was initially defined in `Zvbc` with only `SEW=64-bit` support, this page describes how the specification is extended in `Zvbc32e` to support `SEW=32` bits.
 ====
 
 Description::
diff --git a/doc/vector-extra/insns/vclmulh-32e.adoc b/doc/vector-extra/insns/vclmulh-32e.adoc
index 3800452d..e8fa6cbe 100644
--- a/doc/vector-extra/insns/vclmulh-32e.adoc
+++ b/doc/vector-extra/insns/vclmulh-32e.adoc
@@ -1,4 +1,4 @@
-[[insns-vclmulh, Vector Carry-less Multiply Return High Half]]
+[[insns-vclmulh-32e, Vector Carry-less Multiply Return High Half]]
 = vclmulh.[vv,vx]
 
 Synopsis::
@@ -56,7 +56,7 @@ Arguments::
 
 [NOTE]
 ====
-`vclmulh` instruction was initially defined in `Zvbc`, this page describes how the specification is extended in `Zvbc32e` to support `SEW=32 bits`.
+`vclmulh` instruction was initially defined in `Zvbc`, this page describes how the specification is extended in `Zvbc32e` to support `SEW=32` bits.
 ====
 
 Description::
@@ -96,4 +96,4 @@ function clmulh(x, y, width) = {
 --
 
 Included in::
-<<zvbc32e>>
+<<zvbc32e>>, Zvbc
diff --git a/doc/vector-extra/insns/vghsh-vs.adoc b/doc/vector-extra/insns/vghsh-vs.adoc
index e1bf1c7d..fcd9d533 100644
--- a/doc/vector-extra/insns/vghsh-vs.adoc
+++ b/doc/vector-extra/insns/vghsh-vs.adoc
@@ -39,17 +39,17 @@ Arguments::
 |SEW
 |Definition
 
-| Vd  | input  | 128  | 4 | 32 | Partial hash (Y~i~)
-| Vs1 | input  | 128  | 4 | 32 | Cipher text (X~i~)
-| Vs2 | input  | 128  | 4 | 32 | Hash Subkey (H)
-| Vd  | output | 128  | 4 | 32 | Partial-hash (Y~i+1~)
+| `vd`  | input  | 128  | 4 | 32 | Partial hash (Y~i~)
+| `vs1` | input  | 128  | 4 | 32 | Cipher text (X~i~)
+| `vs2` | input  | 128  | 4 | 32 | Hash Subkey (H)
+| `vd`  | output | 128  | 4 | 32 | Partial-hash (Y~i+1~)
 |===
 
 Description::
 A single "iteration" of the GHASH~H~ algorithm is performed.
 
 
-The previous partial hashes are read as 4-element groups from 'vd',
+The previous partial hashes are read as 4-element groups from `vd`,
 the cipher texts are read as 4-element groups from `vs1`
  and the hash subkeys are read from the scalar element group in `vs2`.
 The resulting partial hashes are writen as 4-element groups into `vd`.
@@ -102,7 +102,7 @@ function clause execute (VGHSHVS(vs2, vs1, vd)) = {
   eg_len = (vl/EGS)
   eg_start = (vstart/EGS)
 
-  // H is component to all element groups
+  // H is common to all element groups
   let helem = 0;
   let H = brev8(get_velem(vs2, EGW=128, helem)); // Hash subkey
 
diff --git a/doc/vector-extra/insns/vgmul-vs.adoc b/doc/vector-extra/insns/vgmul-vs.adoc
index 1192f334..622badd1 100644
--- a/doc/vector-extra/insns/vgmul-vs.adoc
+++ b/doc/vector-extra/insns/vgmul-vs.adoc
@@ -37,15 +37,15 @@ Arguments::
 |SEW
 |Definition
 
-| Vd  | input  | 128  | 4 | 32 | Multiplier
-| Vs2 | input  | 128  | 4 | 32 | Multiplicand
-| Vd  | output | 128  | 4 | 32 | Product
+| `vd`  | input  | 128  | 4 | 32 | Multiplier
+| `vs2` | input  | 128  | 4 | 32 | Multiplicand
+| `vd`  | output | 128  | 4 | 32 | Product
 |===
 
 Description::
 A GHASH~H~ multiply is performed.
 
-The multipliers are read as 4-element groups from 'vd',
+The multipliers are read as 4-element groups from `vd`,
  the multiplicands subkeys are read from the scalar element group in `vs2`.
 The resulting products are written as 4-element groups into `vd`.
 
@@ -98,7 +98,7 @@ function clause execute (VGMUL(vs2, vs1, vd, suffix)) = {
 
   eg_len = (vl/EGS)
   eg_start = (vstart/EGS)
-  // H multiplicand is constant for all loop iterations
+  // H multiplicand is common for all loop iterations
   let helem = 0;
   let H = brev8(get_velem(vs2,EGW=128, helem)); // Multiplicand
 
diff --git a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
index 53da28cc..33fec430 100644
--- a/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
+++ b/doc/vector-extra/riscv-crypto-spec-vector-extra.adoc
@@ -2,8 +2,8 @@
 = RISC-V Cryptography Extensions Volume III: Additional Vector Instructions
 :description: The addtional vector cryptography extensions for the RISC-V ISA.
 :company: RISC-V.org
-:revdate: 6 February 2024
-:revnumber: v0.0.4
+:revdate: March 7th 2024
+:revnumber: v0.0.5
 :revremark:
 :url-riscv: http://riscv.org
 :doctype: book
@@ -75,6 +75,7 @@ Contributors to this specification (in alphabetical order)
 include: +
 Eric Biggers,
 Ken Dockser,
+Liana Koleva,
 Markku-Juhani O. Saarinen,
 Nicolas Brunie,
 Richard Newell
@@ -102,6 +103,7 @@ Instruction Set Extension Specification.
 
 All the Additional Vector Crypto Extensions can be built
 on _any_ embedded (Zve*) or application ("V") base Vector Extension.
+In particular `Zvbc32e` allows `Zve32*` implementations to support vector carry-less multiplication.
 
 // See <<crypto-vector-element-groups>> for more details on vector element groups and the drawbacks of
 // small `VLEN` values.
@@ -157,6 +159,8 @@ bibliography::../riscv-crypto-spec.bib[ieee]
 
 [[Encodings]]
 == Encodings
+include::./riscv-crypto-vector-extra-inst-table-zvbc32e.adoc[]
+
 include::./riscv-crypto-vector-extra-inst-table.adoc[]
 
 
diff --git a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc
index 8bd81a2d..b1439419 100644
--- a/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc
+++ b/doc/vector-extra/riscv-crypto-vector-extra-inst-table.adoc
@@ -1,10 +1,10 @@
 [appendix]
-[[crypto_vector_instructions]]
+[[crypto_vector_instructions_Zvkgs]]
 === Additional Vector Cryptographic Instructions
 
 OP-P (0x77)
 Vector Crypto instructions, including `Zvkgs`, except `Zvbb` and `Zvbc`.
-The new/modified encoding are in bold and underlined.
+The new/modified encodings are in bold.
 
 // [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"]
 [cols="4,1,1,1,1,4,1,1,1,4,1,1,1"]
@@ -20,7 +20,6 @@ The new/modified encoding are in bold and underlined.
 // [cols="4,1,1,1,8,4,1,1,8,4,1,1,8"]
 [cols="6,1,1,1,1,6,1,1,6,6,1,1,1"]
 
-// TODO to be updated with vghsh.vs and vgmul.vs encoding
 |===
 5+^| funct6                  4+^| funct6                 4+^| funct6
 
diff --git a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc
index 8d057e6a..6a516729 100644
--- a/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc
+++ b/doc/vector-extra/riscv-crypto-vector-extra-introduction.adoc
@@ -4,7 +4,8 @@
 This document describes the proposed _additional_ _vector_ cryptography
 extensions for RISC-V.
 Those extensions extend the _vector_ cryptography extensions for RISC-V,
-providing additional features not mandatory for a high performace implementation but which
-can help further improve the efficiency some algorithms (e.g. CRC, AES-GCM).
+providing additional features.
+Those extensions aim at either enabling some use cases (e.g. carry-less multiply on 32-bit vector implementations)
+or enabling more efficient implementations of some algorithms (e.g. CRC, AES-GCM).
 All instructions proposed here are based on the Vector registers.