Skip to content

Commit 6fb7bdc

Browse files
committed
cmd/compile: intrinsify math/bits.TrailingZeros on riscv64
For riscv64/rva22u64 and above, we can intrinsify math/bits.TrailingZeros using the CTZ/CTZW machine instructions. On a StarFive VisionFive 2 with GORISCV64=rva22u64: │ ctz.b.1 │ ctz.b.2 │ │ sec/op │ sec/op vs base │ TrailingZeros-4 25.500n ± 0% 8.052n ± 0% -68.42% (p=0.000 n=10) TrailingZeros8-4 14.76n ± 0% 10.74n ± 0% -27.24% (p=0.000 n=10) TrailingZeros16-4 26.84n ± 0% 10.74n ± 0% -59.99% (p=0.000 n=10) TrailingZeros32-4 25.500n ± 0% 8.052n ± 0% -68.42% (p=0.000 n=10) TrailingZeros64-4 25.500n ± 0% 8.052n ± 0% -68.42% (p=0.000 n=10) geomean 23.09n 9.035n -60.88% Change-Id: I71edf2b988acb7a68e797afda4ee66d7a57d587e Reviewed-on: https://go-review.googlesource.com/c/go/+/652320 Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Mark Ryan <markdryan@rivosinc.com> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
1 parent e6ffe76 commit 6fb7bdc

File tree

8 files changed

+141
-11
lines changed

8 files changed

+141
-11
lines changed

Diff for: src/cmd/compile/internal/riscv64/ssa.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
419419
ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVDX,
420420
ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS,
421421
ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD,
422-
ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW:
422+
ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW:
423423
p := s.Prog(v.Op.Asm())
424424
p.From.Type = obj.TYPE_REG
425425
p.From.Reg = v.Args[0].Reg()

Diff for: src/cmd/compile/internal/ssa/_gen/RISCV64.rules

+7
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,13 @@
218218
(RotateLeft32 ...) => (ROLW ...)
219219
(RotateLeft64 ...) => (ROL ...)
220220

221+
// Count trailing zeros (note that these will only be emitted for rva22u64 and above).
222+
(Ctz(64|32|16|8)NonZero ...) => (Ctz64 ...)
223+
(Ctz64 ...) => (CTZ ...)
224+
(Ctz32 ...) => (CTZW ...)
225+
(Ctz16 x) => (CTZW (ORI <typ.UInt32> [1<<16] x))
226+
(Ctz8 x) => (CTZW (ORI <typ.UInt32> [1<<8] x))
227+
221228
(Less64 ...) => (SLT ...)
222229
(Less32 x y) => (SLT (SignExt32to64 x) (SignExt32to64 y))
223230
(Less16 x y) => (SLT (SignExt16to64 x) (SignExt16to64 y))

Diff for: src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go

+2
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,8 @@ func init() {
229229
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
230230
{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // ^arg0 & arg1
231231
{name: "ANDI", argLength: 1, reg: gp11, asm: "ANDI", aux: "Int64"}, // arg0 & auxint
232+
{name: "CTZ", argLength: 1, reg: gp11, asm: "CTZ"}, // count trailing zeros
233+
{name: "CTZW", argLength: 1, reg: gp11, asm: "CTZW"}, // count trailing zeros of least significant word
232234
{name: "NOT", argLength: 1, reg: gp11, asm: "NOT"}, // ^arg0
233235
{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true}, // arg0 | arg1
234236
{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"}, // ^arg0 | arg1

Diff for: src/cmd/compile/internal/ssa/opGen.go

+28
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: src/cmd/compile/internal/ssa/rewriteRISCV64.go

+54
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: src/cmd/compile/internal/ssagen/intrinsics.go

+24
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,30 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
900900
return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0])
901901
},
902902
sys.AMD64, sys.ARM, sys.ARM64, sys.I386, sys.MIPS, sys.Loong64, sys.PPC64, sys.S390X, sys.Wasm)
903+
904+
if cfg.goriscv64 >= 22 {
905+
addF("math/bits", "TrailingZeros64",
906+
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
907+
return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], args[0])
908+
},
909+
sys.RISCV64)
910+
addF("math/bits", "TrailingZeros32",
911+
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
912+
return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0])
913+
},
914+
sys.RISCV64)
915+
addF("math/bits", "TrailingZeros16",
916+
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
917+
return s.newValue1(ssa.OpCtz16, types.Types[types.TINT], args[0])
918+
},
919+
sys.RISCV64)
920+
addF("math/bits", "TrailingZeros8",
921+
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
922+
return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0])
923+
},
924+
sys.RISCV64)
925+
}
926+
903927
alias("math/bits", "ReverseBytes64", "internal/runtime/sys", "Bswap64", all...)
904928
alias("math/bits", "ReverseBytes32", "internal/runtime/sys", "Bswap32", all...)
905929
addF("math/bits", "ReverseBytes16",

Diff for: src/cmd/compile/internal/ssagen/intrinsics_test.go

+9-1
Original file line numberDiff line numberDiff line change
@@ -1106,6 +1106,9 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
11061106
{"riscv64", "internal/runtime/sys", "GetCallerPC"}: struct{}{},
11071107
{"riscv64", "internal/runtime/sys", "GetCallerSP"}: struct{}{},
11081108
{"riscv64", "internal/runtime/sys", "GetClosurePtr"}: struct{}{},
1109+
{"riscv64", "internal/runtime/sys", "TrailingZeros32"}: struct{}{},
1110+
{"riscv64", "internal/runtime/sys", "TrailingZeros64"}: struct{}{},
1111+
{"riscv64", "internal/runtime/sys", "TrailingZeros8"}: struct{}{},
11091112
{"riscv64", "math", "Abs"}: struct{}{},
11101113
{"riscv64", "math", "Copysign"}: struct{}{},
11111114
{"riscv64", "math", "FMA"}: struct{}{},
@@ -1122,6 +1125,10 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
11221125
{"riscv64", "math/bits", "RotateLeft8"}: struct{}{},
11231126
{"riscv64", "math/bits", "Sub"}: struct{}{},
11241127
{"riscv64", "math/bits", "Sub64"}: struct{}{},
1128+
{"riscv64", "math/bits", "TrailingZeros16"}: struct{}{},
1129+
{"riscv64", "math/bits", "TrailingZeros32"}: struct{}{},
1130+
{"riscv64", "math/bits", "TrailingZeros64"}: struct{}{},
1131+
{"riscv64", "math/bits", "TrailingZeros8"}: struct{}{},
11251132
{"riscv64", "runtime", "KeepAlive"}: struct{}{},
11261133
{"riscv64", "runtime", "publicationBarrier"}: struct{}{},
11271134
{"riscv64", "runtime", "slicebytetostringtmp"}: struct{}{},
@@ -1308,7 +1315,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
13081315

13091316
func TestIntrinsics(t *testing.T) {
13101317
cfg := &intrinsicBuildConfig{
1311-
goppc64: 10,
1318+
goppc64: 10,
1319+
goriscv64: 23,
13121320
}
13131321
initIntrinsics(cfg)
13141322

Diff for: test/codegen/mathbits.go

+16-9
Original file line numberDiff line numberDiff line change
@@ -356,28 +356,30 @@ func RotateLeftVariable32(n uint32, m int) uint32 {
356356
// ------------------------ //
357357

358358
func TrailingZeros(n uint) int {
359+
// 386:"BSFL"
359360
// amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ"
360361
// amd64/v3:"TZCNTQ"
361-
// 386:"BSFL"
362362
// arm:"CLZ"
363363
// arm64:"RBIT","CLZ"
364364
// loong64:"CTZV"
365-
// s390x:"FLOGR"
366365
// ppc64x/power8:"ANDN","POPCNTD"
367366
// ppc64x/power9: "CNTTZD"
367+
// riscv64/rva22u64,riscv64/rva23u64: "CTZ\t"
368+
// s390x:"FLOGR"
368369
// wasm:"I64Ctz"
369370
return bits.TrailingZeros(n)
370371
}
371372

372373
func TrailingZeros64(n uint64) int {
374+
// 386:"BSFL","JNE"
373375
// amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ"
374376
// amd64/v3:"TZCNTQ"
375-
// 386:"BSFL","JNE"
376377
// arm64:"RBIT","CLZ"
377378
// loong64:"CTZV"
378-
// s390x:"FLOGR"
379379
// ppc64x/power8:"ANDN","POPCNTD"
380380
// ppc64x/power9: "CNTTZD"
381+
// riscv64/rva22u64,riscv64/rva23u64: "CTZ\t"
382+
// s390x:"FLOGR"
381383
// wasm:"I64Ctz"
382384
return bits.TrailingZeros64(n)
383385
}
@@ -389,40 +391,43 @@ func TrailingZeros64Subtract(n uint64) int {
389391
}
390392

391393
func TrailingZeros32(n uint32) int {
394+
// 386:"BSFL"
392395
// amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ"
393396
// amd64/v3:"TZCNTL"
394-
// 386:"BSFL"
395397
// arm:"CLZ"
396398
// arm64:"RBITW","CLZW"
397399
// loong64:"CTZW"
398-
// s390x:"FLOGR","MOVWZ"
399400
// ppc64x/power8:"ANDN","POPCNTW"
400401
// ppc64x/power9: "CNTTZW"
402+
// riscv64/rva22u64,riscv64/rva23u64: "CTZW"
403+
// s390x:"FLOGR","MOVWZ"
401404
// wasm:"I64Ctz"
402405
return bits.TrailingZeros32(n)
403406
}
404407

405408
func TrailingZeros16(n uint16) int {
406-
// amd64:"BSFL","ORL\\t\\$65536"
407409
// 386:"BSFL\t"
410+
// amd64:"BSFL","ORL\\t\\$65536"
408411
// arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
409412
// arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
410413
// loong64:"CTZV"
411-
// s390x:"FLOGR","OR\t\\$65536"
412414
// ppc64x/power8:"POPCNTW","ADD\t\\$-1"
413415
// ppc64x/power9:"CNTTZD","ORIS\\t\\$1"
416+
// riscv64/rva22u64,riscv64/rva23u64: "ORI\t\\$65536","CTZW"
417+
// s390x:"FLOGR","OR\t\\$65536"
414418
// wasm:"I64Ctz"
415419
return bits.TrailingZeros16(n)
416420
}
417421

418422
func TrailingZeros8(n uint8) int {
419-
// amd64:"BSFL","ORL\\t\\$256"
420423
// 386:"BSFL"
424+
// amd64:"BSFL","ORL\\t\\$256"
421425
// arm:"ORR\t\\$256","CLZ",-"MOVBU\tR"
422426
// arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"
423427
// loong64:"CTZV"
424428
// ppc64x/power8:"POPCNTB","ADD\t\\$-1"
425429
// ppc64x/power9:"CNTTZD","OR\t\\$256"
430+
// riscv64/rva22u64,riscv64/rva23u64: "ORI\t\\$256","CTZW"
426431
// s390x:"FLOGR","OR\t\\$256"
427432
// wasm:"I64Ctz"
428433
return bits.TrailingZeros8(n)
@@ -469,6 +474,7 @@ func IterateBits16(n uint16) int {
469474
// amd64/v1,amd64/v2:"BSFL",-"BTSL"
470475
// amd64/v3:"TZCNTL"
471476
// arm64:"RBITW","CLZW",-"ORR"
477+
// riscv64/rva22u64,riscv64/rva23u64: "CTZ\t",-"ORR"
472478
i += bits.TrailingZeros16(n)
473479
n &= n - 1
474480
}
@@ -481,6 +487,7 @@ func IterateBits8(n uint8) int {
481487
// amd64/v1,amd64/v2:"BSFL",-"BTSL"
482488
// amd64/v3:"TZCNTL"
483489
// arm64:"RBITW","CLZW",-"ORR"
490+
// riscv64/rva22u64,riscv64/rva23u64: "CTZ\t",-"ORR"
484491
i += bits.TrailingZeros8(n)
485492
n &= n - 1
486493
}

0 commit comments

Comments
 (0)