Merge pull request #1490 from riscv/lmul-asm

Include LMUL in vector examples
riscv · Jul 2, 2024 · ef6024b · ef6024b
2 parents b7a445a + 75e857d
commit ef6024b
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 10 deletions.
diff --git a/src/example/sgemm.S b/src/example/sgemm.S
@@ -73,7 +73,7 @@ c_row_loop: # Loop across rows of C blocks
     mv cnp, cp # Initialize C n-loop pointer
 
 c_col_loop: # Loop across one row of C blocks
-    vsetvli nvl, nt, e32, ta, ma  # 32-bit vectors, LMUL=1
+    vsetvli nvl, nt, e32, m1, ta, ma  # 32-bit vectors, LMUL=1
 
     mv akp, ap   # reset pointer into A to beginning
     mv bkp, bnp # step to next column in B matrix

diff --git a/src/example/vvaddint32.s b/src/example/vvaddint32.s
@@ -8,7 +8,7 @@
     # a0 = n, a1 = x, a2 = y, a3 = z
     # Non-vector instructions are indented
 vvaddint32:
-    vsetvli t0, a0, e32, ta, ma  # Set vector length based on 32-bit vectors
+    vsetvli t0, a0, e32, m1, ta, ma  # Set vector length based on 32-bit vectors
     vle32.v v0, (a1)         # Get first vector
       sub a0, a0, t0         # Decrement number done
       slli t0, t0, 2         # Multiply number done by 4 bytes

diff --git a/src/v-st-ext.adoc b/src/v-st-ext.adoc
@@ -1163,13 +1163,13 @@ and `vsetivli`, and in the `rs2` register for `vsetvl`.
  mf8  # LMUL=1/8
  mf4  # LMUL=1/4
  mf2  # LMUL=1/2
- m1   # LMUL=1, assumed if m setting absent
+ m1   # LMUL=1
  m2   # LMUL=2
  m4   # LMUL=4
  m8   # LMUL=8
 
 Examples:
-    vsetvli t0, a0, e8, ta, ma          # SEW= 8, LMUL=1
+    vsetvli t0, a0, e8, m1, ta, ma      # SEW= 8, LMUL=1
     vsetvli t0, a0, e8, m2, ta, ma      # SEW= 8, LMUL=2
     vsetvli t0, a0, e32, mf2, ta, ma    # SEW=32, LMUL=1/2
 ----
@@ -1854,15 +1854,15 @@ field to be stored to each segment.
 ----
     # Example 1
     # Memory structure holds packed RGB pixels (24-bit data structure, 8bpp)
-    vsetvli a1, t0, e8, ta, ma
+    vsetvli a1, t0, e8, m1, ta, ma
     vlseg3e8.v v8, (a0), vm
     # v8 holds the red pixels
     # v9 holds the green pixels
     # v10 holds the blue pixels
 
     # Example 2
     # Memory structure holds complex values, 32b for real and 32b for imaginary
-    vsetvli a1, t0, e32, ta, ma
+    vsetvli a1, t0, e32, m1, ta, ma
     vlseg2e32.v v8, (a0), vm
     # v8 holds real
     # v9 holds imaginary
@@ -1897,13 +1897,13 @@ NOTE: Negative and zero strides are supported.
     vssseg<nf>e<eew>.v vs3, (rs1), rs2, vm         # Strided segment stores
 
     # Examples
-    vsetvli a1, t0, e8, ta, ma
+    vsetvli a1, t0, e8, m1, ta, ma
     vlsseg3e8.v v4, (x5), x6   # Load bytes at addresses x5+i*x6   into v4[i],
                               #  and bytes at addresses x5+i*x6+1 into v5[i],
                               #  and bytes at addresses x5+i*x6+2 into v6[i].
 
     # Examples
-    vsetvli a1, t0, e32, ta, ma
+    vsetvli a1, t0, e32, m1, ta, ma
     vssseg2e32.v v2, (x5), x6   # Store words from v2[i] to address x5+i*x6
                                 #   and words from v3[i] to address x5+i*x6+4
 ----
@@ -1935,13 +1935,13 @@ The EMUL * NFIELDS {le} 8 constraint applies to the data vector register group.
     vsoxseg<nf>ei<eew>.v vs3, (rs1), vs2, vm  # Indexed-ordered segment stores
 
     # Examples
-    vsetvli a1, t0, e8, ta, ma
+    vsetvli a1, t0, e8, m1, ta, ma
     vluxseg3ei8.v v4, (x5), v3   # Load bytes at addresses x5+v3[i]   into v4[i],
                                  #  and bytes at addresses x5+v3[i]+1 into v5[i],
                                  #  and bytes at addresses x5+v3[i]+2 into v6[i].
 
     # Examples
-    vsetvli a1, t0, e32, ta, ma
+    vsetvli a1, t0, e32, m1, ta, ma
     vsuxseg2ei32.v v2, (x5), v5   # Store words from v2[i] to address x5+v5[i]
                                   #   and words from v3[i] to address x5+v5[i]+4
 ----