Skip to content

Commit

Permalink
cmd/compile/internal/ssa: on PPC64, merge (CMPconst [0] (op ...)) mor…
Browse files Browse the repository at this point in the history
…e aggressively

Generate the CC version of many opcodes whose result is compared against
signed 0. The approach taken here works even if the opcode result is used in
multiple places too.

Add support for ADD, ADDconst, ANDN, SUB, NEG, CNTLZD, NOR conversions
to their CC opcode variant. These are the most commonly used variants.

Also, do not set clobberFlags of CNTLZD and CNTLZW, they do not clobber
flags.

This results in about 1% smaller text sections in kubernetes binaries,
and no regressions in the crypto benchmarks.

Change-Id: I9e0381944869c3774106bf348dead5ecb96dffda
Reviewed-on: https://go-review.googlesource.com/c/go/+/538636
Run-TryBot: Paul Murphy <murp@ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Jayanth Krishnamurthy <jayanth.krishnamurthy@ibm.com>
Reviewed-by: Heschi Kreinick <heschi@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
  • Loading branch information
pmur committed Nov 13, 2023
1 parent 30de0b5 commit 773039e
Show file tree
Hide file tree
Showing 7 changed files with 663 additions and 74 deletions.
13 changes: 10 additions & 3 deletions src/cmd/compile/internal/ppc64/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = r

case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC:
case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC,
ssa.OpPPC64ANDNCC:
r1 := v.Args[0].Reg()
r2 := v.Args[1].Reg()
p := s.Prog(v.Op.Asm())
Expand All @@ -603,6 +604,13 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg0()

case ssa.OpPPC64NEGCC, ssa.OpPPC64CNTLZDCC:
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg0()
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[0].Reg()

case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
Expand Down Expand Up @@ -734,13 +742,12 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()

case ssa.OpPPC64ANDCCconst:
case ssa.OpPPC64ADDCCconst, ssa.OpPPC64ANDCCconst:
p := s.Prog(v.Op.Asm())
p.Reg = v.Args[0].Reg()
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt
p.To.Type = obj.TYPE_REG
// p.To.Reg = ppc64.REGTMP // discard result
p.To.Reg = v.Reg0()

case ssa.OpPPC64MOVDaddr:
Expand Down
83 changes: 45 additions & 38 deletions src/cmd/compile/internal/ssa/_gen/PPC64Ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,14 +176,17 @@ func init() {
r6 = buildReg("R6")
)
ops := []opData{
{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true}, // arg0 + arg1
{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "Int64"}, // arg0 + auxInt
{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true}, // arg0+arg1
{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1
{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0-arg1
{name: "SUBFCconst", argLength: 1, reg: gp11cxer, asm: "SUBC", aux: "Int64"}, // auxInt - arg0 (carry is ignored)
{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"}, // arg0-arg1
{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"}, // arg0-arg1
{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true}, // arg0 + arg1
{name: "ADDCC", argLength: 2, reg: gp21, asm: "ADDCC", commutative: true, typ: "(Int,Flags)"}, // arg0 + arg1
{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "Int64"}, // arg0 + auxInt
{name: "ADDCCconst", argLength: 1, reg: gp11cxer, asm: "ADDCCC", aux: "Int64", typ: "(Int,Flags)"}, // arg0 + auxInt sets CC, clobbers XER
{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true}, // arg0+arg1
{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1
{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0-arg1
{name: "SUBCC", argLength: 2, reg: gp21, asm: "SUBCC", typ: "(Int,Flags)"}, // arg0-arg1 sets CC
{name: "SUBFCconst", argLength: 1, reg: gp11cxer, asm: "SUBC", aux: "Int64"}, // auxInt - arg0 (carry is ignored)
{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"}, // arg0-arg1
{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"}, // arg0-arg1

{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
Expand Down Expand Up @@ -245,8 +248,9 @@ func init() {
{name: "RLDICL", argLength: 1, reg: gp11, asm: "RLDICL", aux: "Int64"}, // Auxint is encoded similarly to RLWINM, but only MB and SH are valid. ME is always 63.
{name: "RLDICR", argLength: 1, reg: gp11, asm: "RLDICR", aux: "Int64"}, // Likewise, but only ME and SH are valid. MB is always 0.

{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros
{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit)
{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD"}, // count leading zeros
{name: "CNTLZDCC", argLength: 1, reg: gp11, asm: "CNTLZDCC", typ: "(Int, Flags)"}, // count leading zeros, sets CC
{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW"}, // count leading zeros (32 bit)

{name: "CNTTZD", argLength: 1, reg: gp11, asm: "CNTTZD"}, // count trailing zeros
{name: "CNTTZW", argLength: 1, reg: gp11, asm: "CNTTZW"}, // count trailing zeros (32 bit)
Expand Down Expand Up @@ -285,34 +289,37 @@ func init() {
{name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"}, // move 64 bits of F register into G register
{name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register

{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0&arg1
{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // arg0&^arg1
{name: "ANDCC", argLength: 2, reg: gp21, asm: "ANDCC", commutative: true, clobberFlags: true, typ: "(Int64,Flags)"}, // arg0&arg1 sets CC
{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true}, // arg0|arg1
{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"}, // arg0|^arg1
{name: "ORCC", argLength: 2, reg: gp21, asm: "ORCC", commutative: true, clobberFlags: true, typ: "(Int,Flags)"}, // arg0|arg1 sets CC
{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true}, // ^(arg0|arg1)
{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true}, // arg0^arg1
{name: "XORCC", argLength: 2, reg: gp21, asm: "XORCC", commutative: true, clobberFlags: true, typ: "(Int,Flags)"}, // arg0^arg1 sets CC
{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true}, // arg0^^arg1
{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0 (integer)
{name: "BRD", argLength: 1, reg: gp11, asm: "BRD"}, // reversebytes64(arg0)
{name: "BRW", argLength: 1, reg: gp11, asm: "BRW"}, // reversebytes32(arg0)
{name: "BRH", argLength: 1, reg: gp11, asm: "BRH"}, // reversebytes16(arg0)
{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"}, // -arg0 (floating point)
{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"}, // sqrt(arg0) (floating point)
{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0) (floating point, single precision)
{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"}, // floor(arg0), float64
{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"}, // ceil(arg0), float64
{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"}, // trunc(arg0), float64
{name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"}, // round(arg0), float64
{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"}, // abs(arg0), float64
{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"}, // -abs(arg0), float64
{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"}, // copysign arg0 -> arg1, float64

{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"}, // arg0|aux
{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"}, // arg0^aux
{name: "ANDCCconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", clobberFlags: true, typ: "(Int,Flags)"}, // arg0&aux == 0 // and-immediate sets CC on PPC, always.
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0&arg1
{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // arg0&^arg1
{name: "ANDNCC", argLength: 2, reg: gp21, asm: "ANDNCC", typ: "(Int64,Flags)"}, // arg0&^arg1 sets CC
{name: "ANDCC", argLength: 2, reg: gp21, asm: "ANDCC", commutative: true, typ: "(Int64,Flags)"}, // arg0&arg1 sets CC
{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true}, // arg0|arg1
{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"}, // arg0|^arg1
{name: "ORCC", argLength: 2, reg: gp21, asm: "ORCC", commutative: true, typ: "(Int,Flags)"}, // arg0|arg1 sets CC
{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true}, // ^(arg0|arg1)
{name: "NORCC", argLength: 2, reg: gp21, asm: "NORCC", commutative: true, typ: "(Int,Flags)"}, // ^(arg0|arg1) sets CC
{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true}, // arg0^arg1
{name: "XORCC", argLength: 2, reg: gp21, asm: "XORCC", commutative: true, typ: "(Int,Flags)"}, // arg0^arg1 sets CC
{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true}, // arg0^^arg1
{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0 (integer)
{name: "NEGCC", argLength: 1, reg: gp11, asm: "NEGCC", typ: "(Int,Flags)"}, // -arg0 (integer) sets CC
{name: "BRD", argLength: 1, reg: gp11, asm: "BRD"}, // reversebytes64(arg0)
{name: "BRW", argLength: 1, reg: gp11, asm: "BRW"}, // reversebytes32(arg0)
{name: "BRH", argLength: 1, reg: gp11, asm: "BRH"}, // reversebytes16(arg0)
{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"}, // -arg0 (floating point)
{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"}, // sqrt(arg0) (floating point)
{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0) (floating point, single precision)
{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"}, // floor(arg0), float64
{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"}, // ceil(arg0), float64
{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"}, // trunc(arg0), float64
{name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"}, // round(arg0), float64
{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"}, // abs(arg0), float64
{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"}, // -abs(arg0), float64
{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"}, // copysign arg0 -> arg1, float64

{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"}, // arg0|aux
{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"}, // arg0^aux
{name: "ANDCCconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", typ: "(Int,Flags)"}, // arg0&aux == 0 // and-immediate sets CC on PPC, always.

{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"}, // sign extend int8 to int64
{name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"}, // zero extend uint8 to uint64
Expand Down
17 changes: 17 additions & 0 deletions src/cmd/compile/internal/ssa/_gen/PPC64latelower.rules
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,20 @@

// When PCRel is supported, paddi can add a 34b signed constant in one instruction.
(ADD (MOVDconst [m]) x) && supportsPPC64PCRel() && (m<<30)>>30 == m => (ADDconst [m] x)


// Where possible and practical, generate CC opcodes. Due to the structure of the rules, there are limits to how
// a Value can be rewritten which make it impossible to correctly rewrite sibling Value users. To workaround this
// case, candidates for CC opcodes are converted in two steps:
// 1. Convert all (x (Op ...) ...) into (x (Select0 (OpCC ...) ...). See convertPPC64OpToOpCC for more
// detail on how and why this is done there.
// 2. Rewrite (CMPconst [0] (Select0 (OpCC ...))) into (Select1 (OpCC...))
// Note: to minimize potentially expensive regeneration of CC opcodes during the flagalloc pass, only rewrite if
// both ops are in the same block.
(CMPconst [0] z:((ADD|AND|ANDN|OR|SUB|NOR|XOR) x y)) && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
(CMPconst [0] z:((NEG|CNTLZD) x)) && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
// Note: ADDCCconst only assembles to 1 instruction for int16 constants.
(CMPconst [0] z:(ADDconst [c] x)) && int64(int16(c)) == c && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
// And finally, fixup the flag user.
(CMPconst <t> [0] (Select0 z:((ADD|AND|ANDN|OR|SUB|NOR|XOR)CC x y))) => (Select1 <t> z)
(CMPconst <t> [0] (Select0 z:((ADDCCconst|NEGCC|CNTLZDCC) y))) => (Select1 <t> z)
Loading

0 comments on commit 773039e

Please sign in to comment.