From 611706b17136beb602711f7bfebd15622f73f58f Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Tue, 1 Aug 2023 14:32:56 -0700 Subject: [PATCH] cmd/compile: don't use BTS when OR works, add direct memory BTS operations Stop using BTSconst and friends when ORLconst can be used instead. OR can be issued by more function units than BTS can, so it could lead to better IPC. OR might take a few more bytes to encode, but not a lot more. Still use BTSconst for cases where the constant otherwise wouldn't fit and would require a separate movabs instruction to materialize the constant. This happens when setting bits 31-63 of 64-bit targets. Add BTS-to-memory operations so we don't need to load/bts/store. Fixes #61694 Change-Id: I00379608df8fb0167cb01466e97d11dec7c1596c Reviewed-on: https://go-review.googlesource.com/c/go/+/515755 Reviewed-by: Keith Randall Run-TryBot: Keith Randall TryBot-Result: Gopher Robot Reviewed-by: Cherry Mui --- src/cmd/compile/internal/amd64/ssa.go | 9 +- src/cmd/compile/internal/ssa/_gen/AMD64.rules | 71 +- src/cmd/compile/internal/ssa/_gen/AMD64Ops.go | 27 +- src/cmd/compile/internal/ssa/opGen.go | 72 +- src/cmd/compile/internal/ssa/rewriteAMD64.go | 658 +++--------------- test/codegen/bits.go | 12 +- test/codegen/mathbits.go | 4 +- test/codegen/memops.go | 29 + 8 files changed, 211 insertions(+), 671 deletions(-) diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index d32ea7ec16103..ab762c24f6703 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -714,9 +714,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Offset = v.AuxInt case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst, ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst, - ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst, - ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst, - ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst: + ssa.OpAMD64BTSQconst, + ssa.OpAMD64BTCQconst, + ssa.OpAMD64BTRQconst: op := v.Op if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 { // Emit 32-bit version because it's shorter @@ -851,7 +851,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { } fallthrough case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, - ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify: + ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify, + ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTCQconstmodify: sc := v.AuxValAndOff() off := sc.Off64() val := sc.Val64() diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules index b6937de800550..c4f74bb0d90ed 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules @@ -82,8 +82,8 @@ (Ctz32 x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x) (Ctz64 x) && buildcfg.GOAMD64 < 3 => (CMOVQEQ (Select0 (BSFQ x)) (MOVQconst [64]) (Select1 (BSFQ x))) (Ctz32 x) && buildcfg.GOAMD64 < 3 => (Select0 (BSFQ (BTSQconst [32] x))) -(Ctz16 x) => (BSFL (BTSLconst [16] x)) -(Ctz8 x) => (BSFL (BTSLconst [ 8] x)) +(Ctz16 x) => (BSFL (ORLconst [1<<16] x)) +(Ctz8 x) => (BSFL (ORLconst [1<<8 ] x)) (Ctz64NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTQ x) (Ctz32NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x) @@ -659,29 +659,16 @@ // Recognize bit setting (a |= 1< (BTS(Q|L) x y) (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y) - -// Convert ORconst into BTS, if the code gets smaller, with boundary being -// (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes). -((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128 - => (BT(S|C)Qconst [int8(log32(c))] x) -((ORL|XORL)const [c] x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128 - => (BT(S|C)Lconst [int8(log32(c))] x) -((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 - => (BT(S|C)Qconst [int8(log64(c))] x) -((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128 - => (BT(S|C)Lconst [int8(log32(c))] x) +// Note: only convert OR/XOR to BTS/BTC if the constant wouldn't fit in +// the constant field of the OR/XOR instruction. See issue 61694. +((OR|XOR)Q (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 1<<31 => (BT(S|C)Qconst [int8(log64(c))] x) // Recognize bit clearing: a &^= 1< (BTR(Q|L) x y) (ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y) -(ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128 - => (BTRQconst [int8(log32(^c))] x) -(ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 - => (BTRLconst [int8(log32(^c))] x) -(ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 - => (BTRQconst [int8(log64(^c))] x) -(ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 - => (BTRLconst [int8(log32(^c))] x) +// Note: only convert AND to BTR if the constant wouldn't fit in +// the constant field of the AND instruction. See issue 61694. +(ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31 => (BTRQconst [int8(log64(^c))] x) // Special-case bit patterns on first/last bit. // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts, @@ -695,9 +682,9 @@ // Special case resetting first/last bit (SHL(L|Q)const [1] (SHR(L|Q)const [1] x)) - => (BTR(L|Q)const [0] x) + => (AND(L|Q)const [-2] x) (SHRLconst [1] (SHLLconst [1] x)) - => (BTRLconst [31] x) + => (ANDLconst [0x7fffffff] x) (SHRQconst [1] (SHLQconst [1] x)) => (BTRQconst [63] x) @@ -731,10 +718,10 @@ => (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem) // Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1) -(BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) => (BTS(Q|L)const [c] x) -(BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTS(Q|L)const [c] x) -(BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) => (BTR(Q|L)const [c] x) -(BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTR(Q|L)const [c] x) +(BTSQconst [c] (BTRQconst [c] x)) => (BTSQconst [c] x) +(BTSQconst [c] (BTCQconst [c] x)) => (BTSQconst [c] x) +(BTRQconst [c] (BTSQconst [c] x)) => (BTRQconst [c] x) +(BTRQconst [c] (BTCQconst [c] x)) => (BTRQconst [c] x) // Fold boolean negation into SETcc. (XORLconst [1] (SETNE x)) => (SETEQ x) @@ -778,31 +765,6 @@ (XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) => (XOR(L|Q)const [c ^ d] x) (OR(L|Q)const [c] (OR(L|Q)const [d] x)) => (OR(L|Q)const [c | d] x) -(BTRLconst [c] (ANDLconst [d] x)) => (ANDLconst [d &^ (1< (ANDLconst [c &^ (1< (ANDLconst [^(1< (XORLconst [d ^ 1< (XORLconst [c ^ 1< (XORLconst [1< (ORLconst [d | 1< (ORLconst [c | 1< (ORLconst [1< (ANDQconst [d &^ (1< (ANDQconst [c &^ (1< (ANDQconst [^(1< (XORQconst [d ^ 1< (XORQconst [c ^ 1< (XORQconst [1< (ORQconst [d | 1< (ORQconst [c | 1< (ORQconst [1< (MULLconst [c * d] x) (MULQconst [c] (MULQconst [d] x)) && is32Bit(int64(c)*int64(d)) => (MULQconst [c * d] x) @@ -1422,11 +1384,8 @@ (NOTQ (MOVQconst [c])) => (MOVQconst [^c]) (NOTL (MOVLconst [c])) => (MOVLconst [^c]) (BTSQconst [c] (MOVQconst [d])) => (MOVQconst [d|(1< (MOVLconst [d|(1< (MOVQconst [d&^(1< (MOVLconst [d&^(1< (MOVQconst [d^(1< (MOVLconst [d^(1< ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) (MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => ((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) +(MOVQstore {sym} [off] ptr x:(BT(S|R|C)Qconst [c] l:(MOVQload {sym} [off] ptr mem)) mem) && x.Uses == 1 && l.Uses == 1 && clobber(x, l) => + (BT(S|R|C)Qconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) // Merge ADDQconst and LEAQ into atomic loads. (MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go index e9205d56c6181..606171947bbd7 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go @@ -399,12 +399,27 @@ func init() { {name: "BTSQ", argLength: 2, reg: gp21, asm: "BTSQ", resultInArg0: true, clobberFlags: true}, // set bit arg1%64 in arg0 {name: "BTLconst", argLength: 1, reg: gp1flags, asm: "BTL", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 32 {name: "BTQconst", argLength: 1, reg: gp1flags, asm: "BTQ", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 64 - {name: "BTCLconst", argLength: 1, reg: gp11, asm: "BTCL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 0 <= auxint < 32 - {name: "BTCQconst", argLength: 1, reg: gp11, asm: "BTCQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 0 <= auxint < 64 - {name: "BTRLconst", argLength: 1, reg: gp11, asm: "BTRL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 0 <= auxint < 32 - {name: "BTRQconst", argLength: 1, reg: gp11, asm: "BTRQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 0 <= auxint < 64 - {name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32 - {name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64 + {name: "BTCQconst", argLength: 1, reg: gp11, asm: "BTCQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 31 <= auxint < 64 + {name: "BTRQconst", argLength: 1, reg: gp11, asm: "BTRQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 31 <= auxint < 64 + {name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 31 <= auxint < 64 + + // BT[SRC]Qconstmodify + // + // S: set bit + // R: reset (clear) bit + // C: complement bit + // + // Apply operation to bit ValAndOff(AuxInt).Val() in the 64 bits at + // memory address arg0+ValAndOff(AuxInt).Off()+aux + // Bit index must be in range (31-63). + // (We use OR/AND/XOR for thinner targets and lower bit indexes.) + // arg1=mem, returns mem + // + // Note that there aren't non-const versions of these instructions. + // Well, there are such instructions, but they are slow and weird so we don't use them. + {name: "BTSQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, + {name: "BTRQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, + {name: "BTCQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // TESTx: compare (arg0 & arg1) to 0 {name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 64aea38afe8b3..84dcd9a3cc8ab 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -716,12 +716,12 @@ const ( OpAMD64BTSQ OpAMD64BTLconst OpAMD64BTQconst - OpAMD64BTCLconst OpAMD64BTCQconst - OpAMD64BTRLconst OpAMD64BTRQconst - OpAMD64BTSLconst OpAMD64BTSQconst + OpAMD64BTSQconstmodify + OpAMD64BTRQconstmodify + OpAMD64BTCQconstmodify OpAMD64TESTQ OpAMD64TESTL OpAMD64TESTW @@ -8779,12 +8779,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "BTCLconst", + name: "BTCQconst", auxType: auxInt8, argLen: 1, resultInArg0: true, clobberFlags: true, - asm: x86.ABTCL, + asm: x86.ABTCQ, reg: regInfo{ inputs: []inputInfo{ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 @@ -8795,12 +8795,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "BTCQconst", + name: "BTRQconst", auxType: auxInt8, argLen: 1, resultInArg0: true, clobberFlags: true, - asm: x86.ABTCQ, + asm: x86.ABTRQ, reg: regInfo{ inputs: []inputInfo{ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 @@ -8811,12 +8811,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "BTRLconst", + name: "BTSQconst", auxType: auxInt8, argLen: 1, resultInArg0: true, clobberFlags: true, - asm: x86.ABTRL, + asm: x86.ABTSQ, reg: regInfo{ inputs: []inputInfo{ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 @@ -8827,50 +8827,44 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "BTRQconst", - auxType: auxInt8, - argLen: 1, - resultInArg0: true, - clobberFlags: true, - asm: x86.ABTRQ, + name: "BTSQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTSQ, reg: regInfo{ inputs: []inputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 - }, - outputs: []outputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB }, }, }, { - name: "BTSLconst", - auxType: auxInt8, - argLen: 1, - resultInArg0: true, - clobberFlags: true, - asm: x86.ABTSL, + name: "BTRQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTRQ, reg: regInfo{ inputs: []inputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 - }, - outputs: []outputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB }, }, }, { - name: "BTSQconst", - auxType: auxInt8, - argLen: 1, - resultInArg0: true, - clobberFlags: true, - asm: x86.ABTSQ, + name: "BTCQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTCQ, reg: regInfo{ inputs: []inputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 - }, - outputs: []outputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB }, }, }, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index afe9ed257a561..979d9be3a7264 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -73,20 +73,14 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64BSWAPL(v) case OpAMD64BSWAPQ: return rewriteValueAMD64_OpAMD64BSWAPQ(v) - case OpAMD64BTCLconst: - return rewriteValueAMD64_OpAMD64BTCLconst(v) case OpAMD64BTCQconst: return rewriteValueAMD64_OpAMD64BTCQconst(v) case OpAMD64BTLconst: return rewriteValueAMD64_OpAMD64BTLconst(v) case OpAMD64BTQconst: return rewriteValueAMD64_OpAMD64BTQconst(v) - case OpAMD64BTRLconst: - return rewriteValueAMD64_OpAMD64BTRLconst(v) case OpAMD64BTRQconst: return rewriteValueAMD64_OpAMD64BTRQconst(v) - case OpAMD64BTSLconst: - return rewriteValueAMD64_OpAMD64BTSLconst(v) case OpAMD64BTSQconst: return rewriteValueAMD64_OpAMD64BTSQconst(v) case OpAMD64CMOVLCC: @@ -2626,26 +2620,6 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool { } break } - // match: (ANDL (MOVLconst [c]) x) - // cond: isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 - // result: (BTRLconst [int8(log32(^c))] x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64MOVLconst { - continue - } - c := auxIntToInt32(v_0.AuxInt) - x := v_1 - if !(isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128) { - continue - } - v.reset(OpAMD64BTRLconst) - v.AuxInt = int8ToAuxInt(int8(log32(^c))) - v.AddArg(x) - return true - } - break - } // match: (ANDL x (MOVLconst [c])) // result: (ANDLconst [c] x) for { @@ -2754,20 +2728,6 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool { } func rewriteValueAMD64_OpAMD64ANDLconst(v *Value) bool { v_0 := v.Args[0] - // match: (ANDLconst [c] x) - // cond: isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 - // result: (BTRLconst [int8(log32(^c))] x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128) { - break - } - v.reset(OpAMD64BTRLconst) - v.AuxInt = int8ToAuxInt(int8(log32(^c))) - v.AddArg(x) - return true - } // match: (ANDLconst [c] (ANDLconst [d] x)) // result: (ANDLconst [c & d] x) for { @@ -2782,20 +2742,6 @@ func rewriteValueAMD64_OpAMD64ANDLconst(v *Value) bool { v.AddArg(x) return true } - // match: (ANDLconst [c] (BTRLconst [d] x)) - // result: (ANDLconst [c &^ (1<= 128 + // cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31 // result: (BTRQconst [int8(log64(^c))] x) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { @@ -3108,7 +3054,7 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool { } c := auxIntToInt64(v_0.AuxInt) x := v_1 - if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 128) { + if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31) { continue } v.reset(OpAMD64BTRQconst) @@ -3230,20 +3176,6 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool { } func rewriteValueAMD64_OpAMD64ANDQconst(v *Value) bool { v_0 := v.Args[0] - // match: (ANDQconst [c] x) - // cond: isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128 - // result: (BTRQconst [int8(log32(^c))] x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128) { - break - } - v.reset(OpAMD64BTRQconst) - v.AuxInt = int8ToAuxInt(int8(log32(^c))) - v.AddArg(x) - return true - } // match: (ANDQconst [c] (ANDQconst [d] x)) // result: (ANDQconst [c & d] x) for { @@ -3258,24 +3190,6 @@ func rewriteValueAMD64_OpAMD64ANDQconst(v *Value) bool { v.AddArg(x) return true } - // match: (ANDQconst [c] (BTRQconst [d] x)) - // cond: is32Bit(int64(c) &^ (1<= 128 - // result: (BTSLconst [int8(log32(c))] x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64MOVLconst { - continue - } - c := auxIntToInt32(v_0.AuxInt) - x := v_1 - if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) { - continue - } - v.reset(OpAMD64BTSLconst) - v.AuxInt = int8ToAuxInt(int8(log32(c))) - v.AddArg(x) - return true - } - break - } // match: (ORL x (MOVLconst [c])) // result: (ORLconst [c] x) for { @@ -14718,20 +14398,6 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { } func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool { v_0 := v.Args[0] - // match: (ORLconst [c] x) - // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128 - // result: (BTSLconst [int8(log32(c))] x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) { - break - } - v.reset(OpAMD64BTSLconst) - v.AuxInt = int8ToAuxInt(int8(log32(c))) - v.AddArg(x) - return true - } // match: (ORLconst [c] (ORLconst [d] x)) // result: (ORLconst [c | d] x) for { @@ -14746,20 +14412,6 @@ func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool { v.AddArg(x) return true } - // match: (ORLconst [c] (BTSLconst [d] x)) - // result: (ORLconst [c | 1<= 128 + // cond: isUint64PowerOfTwo(c) && uint64(c) >= 1<<31 // result: (BTSQconst [int8(log64(c))] x) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { @@ -15002,7 +14654,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } c := auxIntToInt64(v_0.AuxInt) x := v_1 - if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) { + if !(isUint64PowerOfTwo(c) && uint64(c) >= 1<<31) { continue } v.reset(OpAMD64BTSQconst) @@ -15201,20 +14853,6 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool { v_0 := v.Args[0] - // match: (ORQconst [c] x) - // cond: isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128 - // result: (BTSQconst [int8(log32(c))] x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128) { - break - } - v.reset(OpAMD64BTSQconst) - v.AuxInt = int8ToAuxInt(int8(log32(c))) - v.AddArg(x) - return true - } // match: (ORQconst [c] (ORQconst [d] x)) // result: (ORQconst [c | d] x) for { @@ -15229,24 +14867,6 @@ func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool { v.AddArg(x) return true } - // match: (ORQconst [c] (BTSQconst [d] x)) - // cond: is32Bit(int64(c) | 1<= 128 - // result: (BTCLconst [int8(log32(c))] x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64MOVLconst { - continue - } - c := auxIntToInt32(v_0.AuxInt) - x := v_1 - if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) { - continue - } - v.reset(OpAMD64BTCLconst) - v.AuxInt = int8ToAuxInt(int8(log32(c))) - v.AddArg(x) - return true - } - break - } // match: (XORL x (MOVLconst [c])) // result: (XORLconst [c] x) for { @@ -23541,20 +23141,6 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool { } func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool { v_0 := v.Args[0] - // match: (XORLconst [c] x) - // cond: isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128 - // result: (BTCLconst [int8(log32(c))] x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128) { - break - } - v.reset(OpAMD64BTCLconst) - v.AuxInt = int8ToAuxInt(int8(log32(c))) - v.AddArg(x) - return true - } // match: (XORLconst [1] (SETNE x)) // result: (SETEQ x) for { @@ -23679,20 +23265,6 @@ func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool { v.AddArg(x) return true } - // match: (XORLconst [c] (BTCLconst [d] x)) - // result: (XORLconst [c ^ 1<= 128 + // cond: isUint64PowerOfTwo(c) && uint64(c) >= 1<<31 // result: (BTCQconst [int8(log64(c))] x) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { @@ -23923,7 +23495,7 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool { } c := auxIntToInt64(v_0.AuxInt) x := v_1 - if !(isUint64PowerOfTwo(c) && uint64(c) >= 128) { + if !(isUint64PowerOfTwo(c) && uint64(c) >= 1<<31) { continue } v.reset(OpAMD64BTCQconst) @@ -24008,20 +23580,6 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool { } func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool { v_0 := v.Args[0] - // match: (XORQconst [c] x) - // cond: isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128 - // result: (BTCQconst [int8(log32(c))] x) - for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128) { - break - } - v.reset(OpAMD64BTCQconst) - v.AuxInt = int8ToAuxInt(int8(log32(c))) - v.AddArg(x) - return true - } // match: (XORQconst [c] (XORQconst [d] x)) // result: (XORQconst [c ^ d] x) for { @@ -24036,24 +23594,6 @@ func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool { v.AddArg(x) return true } - // match: (XORQconst [c] (BTCQconst [d] x)) - // cond: is32Bit(int64(c) ^ 1< [16] x)) + // result: (BSFL (ORLconst [1<<16] x)) for { x := v_0 v.reset(OpAMD64BSFL) - v0 := b.NewValue0(v.Pos, OpAMD64BTSLconst, typ.UInt32) - v0.AuxInt = int8ToAuxInt(16) + v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32) + v0.AuxInt = int32ToAuxInt(1 << 16) v0.AddArg(x) v.AddArg(v0) return true @@ -25848,12 +25388,12 @@ func rewriteValueAMD64_OpCtz8(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (Ctz8 x) - // result: (BSFL (BTSLconst [ 8] x)) + // result: (BSFL (ORLconst [1<<8 ] x)) for { x := v_0 v.reset(OpAMD64BSFL) - v0 := b.NewValue0(v.Pos, OpAMD64BTSLconst, typ.UInt32) - v0.AuxInt = int8ToAuxInt(8) + v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32) + v0.AuxInt = int32ToAuxInt(1 << 8) v0.AddArg(x) v.AddArg(v0) return true diff --git a/test/codegen/bits.go b/test/codegen/bits.go index 018f5b909e6d6..88d5ebe9cf094 100644 --- a/test/codegen/bits.go +++ b/test/codegen/bits.go @@ -220,10 +220,10 @@ func biton32(a, b uint32) (n uint32) { // amd64:"BTSL" n += b | (1 << (a & 31)) - // amd64:"BTSL\t[$]31" + // amd64:"ORL\t[$]-2147483648" n += a | (1 << 31) - // amd64:"BTSL\t[$]28" + // amd64:"ORL\t[$]268435456" n += a | (1 << 28) // amd64:"ORL\t[$]1" @@ -236,10 +236,10 @@ func bitoff32(a, b uint32) (n uint32) { // amd64:"BTRL" n += b &^ (1 << (a & 31)) - // amd64:"BTRL\t[$]31" + // amd64:"ANDL\t[$]2147483647" n += a &^ (1 << 31) - // amd64:"BTRL\t[$]28" + // amd64:"ANDL\t[$]-268435457" n += a &^ (1 << 28) // amd64:"ANDL\t[$]-2" @@ -252,10 +252,10 @@ func bitcompl32(a, b uint32) (n uint32) { // amd64:"BTCL" n += b ^ (1 << (a & 31)) - // amd64:"BTCL\t[$]31" + // amd64:"XORL\t[$]-2147483648" n += a ^ (1 << 31) - // amd64:"BTCL\t[$]28" + // amd64:"XORL\t[$]268435456" n += a ^ (1 << 28) // amd64:"XORL\t[$]1" diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 797aa23b67873..d80bfaeec07d9 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -335,7 +335,7 @@ func TrailingZeros32(n uint32) int { } func TrailingZeros16(n uint16) int { - // amd64:"BSFL","BTSL\\t\\$16" + // amd64:"BSFL","ORL\\t\\$65536" // 386:"BSFL\t" // arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR" // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t" @@ -347,7 +347,7 @@ func TrailingZeros16(n uint16) int { } func TrailingZeros8(n uint8) int { - // amd64:"BSFL","BTSL\\t\\$8" + // amd64:"BSFL","ORL\\t\\$256" // 386:"BSFL" // arm:"ORR\t\\$256","CLZ",-"MOVBU\tR" // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t" diff --git a/test/codegen/memops.go b/test/codegen/memops.go index f6cf9450a1eb4..e5e89c2acc9f4 100644 --- a/test/codegen/memops.go +++ b/test/codegen/memops.go @@ -372,3 +372,32 @@ func storeTest(a []bool, v int, i int) { // amd64: `BTL\t\$1,`,`SETCS\t3\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` a[3+i] = v&2 != 0 } + +func bitOps(p *[12]uint64) { + // amd64: `ORQ\t\$8, \(AX\)` + p[0] |= 8 + // amd64: `ORQ\t\$1073741824, 8\(AX\)` + p[1] |= 1 << 30 + // amd64: `BTSQ\t\$31, 16\(AX\)` + p[2] |= 1 << 31 + // amd64: `BTSQ\t\$63, 24\(AX\)` + p[3] |= 1 << 63 + + // amd64: `ANDQ\t\$-9, 32\(AX\)` + p[4] &^= 8 + // amd64: `ANDQ\t\$-1073741825, 40\(AX\)` + p[5] &^= 1 << 30 + // amd64: `BTRQ\t\$31, 48\(AX\)` + p[6] &^= 1 << 31 + // amd64: `BTRQ\t\$63, 56\(AX\)` + p[7] &^= 1 << 63 + + // amd64: `XORQ\t\$8, 64\(AX\)` + p[8] ^= 8 + // amd64: `XORQ\t\$1073741824, 72\(AX\)` + p[9] ^= 1 << 30 + // amd64: `BTCQ\t\$31, 80\(AX\)` + p[10] ^= 1 << 31 + // amd64: `BTCQ\t\$63, 88\(AX\)` + p[11] ^= 1 << 63 +}