Skip to content

Commit

Permalink
Merge pull request #1490 from riscv/lmul-asm
Browse files Browse the repository at this point in the history
Include LMUL in vector examples
  • Loading branch information
aswaterman authored Jul 2, 2024
2 parents b7a445a + 75e857d commit ef6024b
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 10 deletions.
2 changes: 1 addition & 1 deletion src/example/sgemm.S
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ c_row_loop: # Loop across rows of C blocks
mv cnp, cp # Initialize C n-loop pointer

c_col_loop: # Loop across one row of C blocks
vsetvli nvl, nt, e32, ta, ma # 32-bit vectors, LMUL=1
vsetvli nvl, nt, e32, m1, ta, ma # 32-bit vectors, LMUL=1

mv akp, ap # reset pointer into A to beginning
mv bkp, bnp # step to next column in B matrix
Expand Down
2 changes: 1 addition & 1 deletion src/example/vvaddint32.s
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# a0 = n, a1 = x, a2 = y, a3 = z
# Non-vector instructions are indented
vvaddint32:
vsetvli t0, a0, e32, ta, ma # Set vector length based on 32-bit vectors
vsetvli t0, a0, e32, m1, ta, ma # Set vector length based on 32-bit vectors
vle32.v v0, (a1) # Get first vector
sub a0, a0, t0 # Decrement number done
slli t0, t0, 2 # Multiply number done by 4 bytes
Expand Down
16 changes: 8 additions & 8 deletions src/v-st-ext.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -1163,13 +1163,13 @@ and `vsetivli`, and in the `rs2` register for `vsetvl`.
mf8 # LMUL=1/8
mf4 # LMUL=1/4
mf2 # LMUL=1/2
m1 # LMUL=1, assumed if m setting absent
m1 # LMUL=1
m2 # LMUL=2
m4 # LMUL=4
m8 # LMUL=8
Examples:
vsetvli t0, a0, e8, ta, ma # SEW= 8, LMUL=1
vsetvli t0, a0, e8, m1, ta, ma # SEW= 8, LMUL=1
vsetvli t0, a0, e8, m2, ta, ma # SEW= 8, LMUL=2
vsetvli t0, a0, e32, mf2, ta, ma # SEW=32, LMUL=1/2
----
Expand Down Expand Up @@ -1854,15 +1854,15 @@ field to be stored to each segment.
----
# Example 1
# Memory structure holds packed RGB pixels (24-bit data structure, 8bpp)
vsetvli a1, t0, e8, ta, ma
vsetvli a1, t0, e8, m1, ta, ma
vlseg3e8.v v8, (a0), vm
# v8 holds the red pixels
# v9 holds the green pixels
# v10 holds the blue pixels
# Example 2
# Memory structure holds complex values, 32b for real and 32b for imaginary
vsetvli a1, t0, e32, ta, ma
vsetvli a1, t0, e32, m1, ta, ma
vlseg2e32.v v8, (a0), vm
# v8 holds real
# v9 holds imaginary
Expand Down Expand Up @@ -1897,13 +1897,13 @@ NOTE: Negative and zero strides are supported.
vssseg<nf>e<eew>.v vs3, (rs1), rs2, vm # Strided segment stores
# Examples
vsetvli a1, t0, e8, ta, ma
vsetvli a1, t0, e8, m1, ta, ma
vlsseg3e8.v v4, (x5), x6 # Load bytes at addresses x5+i*x6 into v4[i],
# and bytes at addresses x5+i*x6+1 into v5[i],
# and bytes at addresses x5+i*x6+2 into v6[i].
# Examples
vsetvli a1, t0, e32, ta, ma
vsetvli a1, t0, e32, m1, ta, ma
vssseg2e32.v v2, (x5), x6 # Store words from v2[i] to address x5+i*x6
# and words from v3[i] to address x5+i*x6+4
----
Expand Down Expand Up @@ -1935,13 +1935,13 @@ The EMUL * NFIELDS {le} 8 constraint applies to the data vector register group.
vsoxseg<nf>ei<eew>.v vs3, (rs1), vs2, vm # Indexed-ordered segment stores
# Examples
vsetvli a1, t0, e8, ta, ma
vsetvli a1, t0, e8, m1, ta, ma
vluxseg3ei8.v v4, (x5), v3 # Load bytes at addresses x5+v3[i] into v4[i],
# and bytes at addresses x5+v3[i]+1 into v5[i],
# and bytes at addresses x5+v3[i]+2 into v6[i].
# Examples
vsetvli a1, t0, e32, ta, ma
vsetvli a1, t0, e32, m1, ta, ma
vsuxseg2ei32.v v2, (x5), v5 # Store words from v2[i] to address x5+v5[i]
# and words from v3[i] to address x5+v5[i]+4
----
Expand Down

0 comments on commit ef6024b

Please sign in to comment.