Skip to content

Commit

Permalink
[dev.ssa] cmd/compile: use 2-result divide op
Browse files Browse the repository at this point in the history
We now allow Values to have 2 outputs.  Use that ability for amd64.
This allows x,y := a/b,a%b to use just a single divide instruction.

Update #6815

Change-Id: Id70bcd20188a2dd8445e631a11d11f60991921e4
Reviewed-on: https://go-review.googlesource.com/25004
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
Reviewed-by: David Chase <drchase@google.com>
  • Loading branch information
randall77 committed Jul 18, 2016
1 parent 25e0a36 commit cf92e38
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 285 deletions.
140 changes: 70 additions & 70 deletions src/cmd/compile/internal/amd64/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,89 +209,87 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
}
opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1]))

case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW,
ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU,
ssa.OpAMD64MODQ, ssa.OpAMD64MODL, ssa.OpAMD64MODW,
ssa.OpAMD64MODQU, ssa.OpAMD64MODLU, ssa.OpAMD64MODWU:
case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
// Arg[0] (the dividend) is in AX.
// Arg[1] (the divisor) can be in any other register.
// Result[0] (the quotient) is in AX.
// Result[1] (the remainder) is in DX.
r := gc.SSARegNum(v.Args[1])

// Arg[0] is already in AX as it's the only register we allow
// and AX is the only output
x := gc.SSARegNum(v.Args[1])

// CPU faults upon signed overflow, which occurs when most
// negative int is divided by -1.
var j *obj.Prog
if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL ||
v.Op == ssa.OpAMD64DIVW || v.Op == ssa.OpAMD64MODQ ||
v.Op == ssa.OpAMD64MODL || v.Op == ssa.OpAMD64MODW {

var c *obj.Prog
switch v.Op {
case ssa.OpAMD64DIVQ, ssa.OpAMD64MODQ:
c = gc.Prog(x86.ACMPQ)
j = gc.Prog(x86.AJEQ)
// go ahead and sign extend to save doing it later
gc.Prog(x86.ACQO)
// Zero extend dividend.
c := gc.Prog(x86.AXORL)
c.From.Type = obj.TYPE_REG
c.From.Reg = x86.REG_DX
c.To.Type = obj.TYPE_REG
c.To.Reg = x86.REG_DX

case ssa.OpAMD64DIVL, ssa.OpAMD64MODL:
c = gc.Prog(x86.ACMPL)
j = gc.Prog(x86.AJEQ)
gc.Prog(x86.ACDQ)

case ssa.OpAMD64DIVW, ssa.OpAMD64MODW:
c = gc.Prog(x86.ACMPW)
j = gc.Prog(x86.AJEQ)
gc.Prog(x86.ACWD)
}
c.From.Type = obj.TYPE_REG
c.From.Reg = x
c.To.Type = obj.TYPE_CONST
c.To.Offset = -1
// Issue divide.
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = r

j.To.Type = obj.TYPE_BRANCH
case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
// Arg[0] (the dividend) is in AX.
// Arg[1] (the divisor) can be in any other register.
// Result[0] (the quotient) is in AX.
// Result[1] (the remainder) is in DX.
r := gc.SSARegNum(v.Args[1])

// CPU faults upon signed overflow, which occurs when the most
// negative int is divided by -1. Handle divide by -1 as a special case.
var c *obj.Prog
switch v.Op {
case ssa.OpAMD64DIVQ:
c = gc.Prog(x86.ACMPQ)
case ssa.OpAMD64DIVL:
c = gc.Prog(x86.ACMPL)
case ssa.OpAMD64DIVW:
c = gc.Prog(x86.ACMPW)
}
c.From.Type = obj.TYPE_REG
c.From.Reg = r
c.To.Type = obj.TYPE_CONST
c.To.Offset = -1
j1 := gc.Prog(x86.AJEQ)
j1.To.Type = obj.TYPE_BRANCH

// for unsigned ints, we sign extend by setting DX = 0
// signed ints were sign extended above
if v.Op == ssa.OpAMD64DIVQU || v.Op == ssa.OpAMD64MODQU ||
v.Op == ssa.OpAMD64DIVLU || v.Op == ssa.OpAMD64MODLU ||
v.Op == ssa.OpAMD64DIVWU || v.Op == ssa.OpAMD64MODWU {
c := gc.Prog(x86.AXORQ)
c.From.Type = obj.TYPE_REG
c.From.Reg = x86.REG_DX
c.To.Type = obj.TYPE_REG
c.To.Reg = x86.REG_DX
// Sign extend dividend.
switch v.Op {
case ssa.OpAMD64DIVQ:
gc.Prog(x86.ACQO)
case ssa.OpAMD64DIVL:
gc.Prog(x86.ACDQ)
case ssa.OpAMD64DIVW:
gc.Prog(x86.ACWD)
}

// Issue divide.
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = x
p.From.Reg = r

// signed division, rest of the check for -1 case
if j != nil {
j2 := gc.Prog(obj.AJMP)
j2.To.Type = obj.TYPE_BRANCH
// Skip over -1 fixup code.
j2 := gc.Prog(obj.AJMP)
j2.To.Type = obj.TYPE_BRANCH

var n *obj.Prog
if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL ||
v.Op == ssa.OpAMD64DIVW {
// n * -1 = -n
n = gc.Prog(x86.ANEGQ)
n.To.Type = obj.TYPE_REG
n.To.Reg = x86.REG_AX
} else {
// n % -1 == 0
n = gc.Prog(x86.AXORQ)
n.From.Type = obj.TYPE_REG
n.From.Reg = x86.REG_DX
n.To.Type = obj.TYPE_REG
n.To.Reg = x86.REG_DX
}
// Issue -1 fixup code.
// n / -1 = -n
n1 := gc.Prog(x86.ANEGQ)
n1.To.Type = obj.TYPE_REG
n1.To.Reg = x86.REG_AX

j.To.Val = n
j2.To.Val = s.Pc()
}
// n % -1 == 0
n2 := gc.Prog(x86.AXORL)
n2.From.Type = obj.TYPE_REG
n2.From.Reg = x86.REG_DX
n2.To.Type = obj.TYPE_REG
n2.To.Reg = x86.REG_DX

// TODO(khr): issue only the -1 fixup code we need.
// For instance, if only the quotient is used, no point in zeroing the remainder.

j1.To.Val = n1
j2.To.Val = s.Pc()

case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB,
ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU:
Expand Down Expand Up @@ -818,6 +816,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = gc.SSARegNum(v)
case ssa.OpSP, ssa.OpSB:
// nothing to do
case ssa.OpSelect0, ssa.OpSelect1:
// nothing to do
case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
Expand Down
32 changes: 16 additions & 16 deletions src/cmd/compile/internal/ssa/gen/AMD64.rules
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@
(Div32F x y) -> (DIVSS x y)
(Div64F x y) -> (DIVSD x y)

(Div64 x y) -> (DIVQ x y)
(Div64u x y) -> (DIVQU x y)
(Div32 x y) -> (DIVL x y)
(Div32u x y) -> (DIVLU x y)
(Div16 x y) -> (DIVW x y)
(Div16u x y) -> (DIVWU x y)
(Div8 x y) -> (DIVW (SignExt8to16 x) (SignExt8to16 y))
(Div8u x y) -> (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))
(Div64 x y) -> (Select0 (DIVQ x y <&TupleType{config.Frontend().TypeInt64(), config.Frontend().TypeInt64()}>))
(Div64u x y) -> (Select0 (DIVQU x y <&TupleType{config.Frontend().TypeUInt64(), config.Frontend().TypeUInt64()}>))
(Div32 x y) -> (Select0 (DIVL x y <&TupleType{config.Frontend().TypeInt32(), config.Frontend().TypeInt32()}>))
(Div32u x y) -> (Select0 (DIVLU x y <&TupleType{config.Frontend().TypeUInt32(), config.Frontend().TypeUInt32()}>))
(Div16 x y) -> (Select0 (DIVW x y <&TupleType{config.Frontend().TypeInt16(), config.Frontend().TypeInt16()}>))
(Div16u x y) -> (Select0 (DIVWU x y <&TupleType{config.Frontend().TypeUInt16(), config.Frontend().TypeUInt16()}>))
(Div8 x y) -> (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y) <&TupleType{config.Frontend().TypeInt8(), config.Frontend().TypeInt8()}>))
(Div8u x y) -> (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y) <&TupleType{config.Frontend().TypeUInt8(), config.Frontend().TypeUInt8()}>))

(Hmul64 x y) -> (HMULQ x y)
(Hmul64u x y) -> (HMULQU x y)
Expand All @@ -49,14 +49,14 @@

(Avg64u x y) -> (AVGQU x y)

(Mod64 x y) -> (MODQ x y)
(Mod64u x y) -> (MODQU x y)
(Mod32 x y) -> (MODL x y)
(Mod32u x y) -> (MODLU x y)
(Mod16 x y) -> (MODW x y)
(Mod16u x y) -> (MODWU x y)
(Mod8 x y) -> (MODW (SignExt8to16 x) (SignExt8to16 y))
(Mod8u x y) -> (MODWU (ZeroExt8to16 x) (ZeroExt8to16 y))
(Mod64 x y) -> (Select1 (DIVQ x y <&TupleType{config.Frontend().TypeInt64(), config.Frontend().TypeInt64()}>))
(Mod64u x y) -> (Select1 (DIVQU x y <&TupleType{config.Frontend().TypeUInt64(), config.Frontend().TypeUInt64()}>))
(Mod32 x y) -> (Select1 (DIVL x y <&TupleType{config.Frontend().TypeInt32(), config.Frontend().TypeInt32()}>))
(Mod32u x y) -> (Select1 (DIVLU x y <&TupleType{config.Frontend().TypeUInt32(), config.Frontend().TypeUInt32()}>))
(Mod16 x y) -> (Select1 (DIVW x y <&TupleType{config.Frontend().TypeInt16(), config.Frontend().TypeInt16()}>))
(Mod16u x y) -> (Select1 (DIVWU x y <&TupleType{config.Frontend().TypeUInt16(), config.Frontend().TypeUInt16()}>))
(Mod8 x y) -> (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y) <&TupleType{config.Frontend().TypeInt8(), config.Frontend().TypeInt8()}>))
(Mod8u x y) -> (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y) <&TupleType{config.Frontend().TypeUInt8(), config.Frontend().TypeUInt8()}>))

(And64 x y) -> (ANDQ x y)
(And32 x y) -> (ANDL x y)
Expand Down
25 changes: 8 additions & 17 deletions src/cmd/compile/internal/ssa/gen/AMD64Ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,10 @@ func init() {
gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly, clobbers: flags}
gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}, clobbers: flags}
gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax},
clobbers: dx | flags}
gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax, dx},
clobbers: flags}
gp11hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx},
clobbers: ax | flags}
gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx},
clobbers: ax | flags}

gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
Expand Down Expand Up @@ -214,19 +212,12 @@ func init() {

{name: "AVGQU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true}, // (arg0 + arg1) / 2 as unsigned, all 64 result bits

{name: "DIVQ", argLength: 2, reg: gp11div, asm: "IDIVQ"}, // arg0 / arg1
{name: "DIVL", argLength: 2, reg: gp11div, asm: "IDIVL"}, // arg0 / arg1
{name: "DIVW", argLength: 2, reg: gp11div, asm: "IDIVW"}, // arg0 / arg1
{name: "DIVQU", argLength: 2, reg: gp11div, asm: "DIVQ"}, // arg0 / arg1
{name: "DIVLU", argLength: 2, reg: gp11div, asm: "DIVL"}, // arg0 / arg1
{name: "DIVWU", argLength: 2, reg: gp11div, asm: "DIVW"}, // arg0 / arg1

{name: "MODQ", argLength: 2, reg: gp11mod, asm: "IDIVQ"}, // arg0 % arg1
{name: "MODL", argLength: 2, reg: gp11mod, asm: "IDIVL"}, // arg0 % arg1
{name: "MODW", argLength: 2, reg: gp11mod, asm: "IDIVW"}, // arg0 % arg1
{name: "MODQU", argLength: 2, reg: gp11mod, asm: "DIVQ"}, // arg0 % arg1
{name: "MODLU", argLength: 2, reg: gp11mod, asm: "DIVL"}, // arg0 % arg1
{name: "MODWU", argLength: 2, reg: gp11mod, asm: "DIVW"}, // arg0 % arg1
{name: "DIVQ", argLength: 2, reg: gp11div, asm: "IDIVQ"}, // [arg0 / arg1, arg0 % arg1]
{name: "DIVL", argLength: 2, reg: gp11div, asm: "IDIVL"}, // [arg0 / arg1, arg0 % arg1]
{name: "DIVW", argLength: 2, reg: gp11div, asm: "IDIVW"}, // [arg0 / arg1, arg0 % arg1]
{name: "DIVQU", argLength: 2, reg: gp11div, asm: "DIVQ"}, // [arg0 / arg1, arg0 % arg1]
{name: "DIVLU", argLength: 2, reg: gp11div, asm: "DIVL"}, // [arg0 / arg1, arg0 % arg1]
{name: "DIVWU", argLength: 2, reg: gp11div, asm: "DIVW"}, // [arg0 / arg1, arg0 % arg1]

{name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true}, // arg0 & arg1
{name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true}, // arg0 & arg1
Expand Down
Loading

0 comments on commit cf92e38

Please sign in to comment.