cmd/compile: clean up ctz ops

Now that we have ops that can return 2 results, have BSF return a result
and flags.  We can then get rid of the redundant comparison and use CMOV
instead of CMOVconst ops.

Get rid of a bunch of the ops we don't use.  Ctz{8,16}, plus all the Clzs,
and CMOVNEs.  I don't think we'll ever use them, and they would be easy
to add back if needed.

Change-Id: I8858a1d017903474ea7e4002fc76a6a86e7bd487
Reviewed-on: https://go-review.googlesource.com/27630
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Keith Randall 2016-08-23 10:43:47 -07:00
parent 6394eb378e
commit 3e270ab80b
12 changed files with 71 additions and 650 deletions

View file

@ -415,23 +415,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = r p.To.Reg = r
case ssa.OpAMD64CMOVQEQconst, ssa.OpAMD64CMOVLEQconst, ssa.OpAMD64CMOVWEQconst, case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ:
ssa.OpAMD64CMOVQNEconst, ssa.OpAMD64CMOVLNEconst, ssa.OpAMD64CMOVWNEconst:
r := gc.SSARegNum(v) r := gc.SSARegNum(v)
if r != gc.SSARegNum(v.Args[0]) { if r != gc.SSARegNum(v.Args[0]) {
v.Fatalf("input[0] and output not in same register %s", v.LongString()) v.Fatalf("input[0] and output not in same register %s", v.LongString())
} }
p := gc.Prog(v.Op.Asm())
// Constant into AX
p := gc.Prog(moveByType(v.Type))
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt
p.To.Type = obj.TYPE_REG
p.To.Reg = x86.REG_AX
p = gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = x86.REG_AX p.From.Reg = gc.SSARegNum(v.Args[1])
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = r p.To.Reg = r
@ -846,9 +837,13 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p := gc.Prog(v.Op.Asm()) p := gc.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = r p.To.Reg = r
case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSFW, case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL:
ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL, ssa.OpAMD64BSRW, p := gc.Prog(v.Op.Asm())
ssa.OpAMD64SQRTSD: p.From.Type = obj.TYPE_REG
p.From.Reg = gc.SSARegNum(v.Args[0])
p.To.Type = obj.TYPE_REG
p.To.Reg = gc.SSARegNum0(v)
case ssa.OpAMD64SQRTSD:
p := gc.Prog(v.Op.Asm()) p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = gc.SSARegNum(v.Args[0]) p.From.Reg = gc.SSARegNum(v.Args[0])

View file

@ -2536,7 +2536,7 @@ func isSSAIntrinsic1(s *Sym) bool {
if s != nil && s.Pkg != nil && s.Pkg.Path == "runtime/internal/sys" { if s != nil && s.Pkg != nil && s.Pkg.Path == "runtime/internal/sys" {
switch s.Name { switch s.Name {
case case
"Ctz64", "Ctz32", "Ctz16", "Ctz64", "Ctz32",
"Bswap64", "Bswap32": "Bswap64", "Bswap32":
return true return true
} }
@ -2569,8 +2569,6 @@ func (s *state) intrinsicCall1(n *Node) *ssa.Value {
result = s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n)) result = s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n))
case "Ctz32": case "Ctz32":
result = s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n)) result = s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n))
case "Ctz16":
result = s.newValue1(ssa.OpCtz16, Types[TUINT16], s.intrinsicFirstArg(n))
case "Bswap64": case "Bswap64":
result = s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n)) result = s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n))
case "Bswap32": case "Bswap32":

View file

@ -98,10 +98,8 @@
(OffPtr [off] ptr) && config.PtrSize == 4 -> (ADDLconst [off] ptr) (OffPtr [off] ptr) && config.PtrSize == 4 -> (ADDLconst [off] ptr)
// Lowering other arithmetic // Lowering other arithmetic
// TODO: CMPQconst 0 below is redundant because BSF sets Z but how to remove? (Ctz64 <t> x) -> (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <TypeFlags> (BSFQ x)))
(Ctz64 <t> x) -> (CMOVQEQconst (BSFQ <t> x) (CMPQconst x [0]) [64]) (Ctz32 <t> x) -> (CMOVLEQ (Select0 <t> (BSFL x)) (MOVLconst <t> [32]) (Select1 <TypeFlags> (BSFL x)))
(Ctz32 <t> x) -> (CMOVLEQconst (BSFL <t> x) (CMPLconst x [0]) [32])
(Ctz16 <t> x) -> (CMOVWEQconst (BSFW <t> x) (CMPWconst x [0]) [16])
(Bswap64 x) -> (BSWAPQ x) (Bswap64 x) -> (BSWAPQ x)
(Bswap32 x) -> (BSWAPL x) (Bswap32 x) -> (BSWAPL x)
@ -1282,31 +1280,6 @@
(CMPWconst x [0]) -> (TESTW x x) (CMPWconst x [0]) -> (TESTW x x)
(CMPBconst x [0]) -> (TESTB x x) (CMPBconst x [0]) -> (TESTB x x)
// Optimizing conditional moves
(CMOVQEQconst x (InvertFlags y) [c]) -> (CMOVQNEconst x y [c])
(CMOVLEQconst x (InvertFlags y) [c]) -> (CMOVLNEconst x y [c])
(CMOVWEQconst x (InvertFlags y) [c]) -> (CMOVWNEconst x y [c])
(CMOVQEQconst _ (FlagEQ) [c]) -> (Const64 [c])
(CMOVLEQconst _ (FlagEQ) [c]) -> (Const32 [c])
(CMOVWEQconst _ (FlagEQ) [c]) -> (Const16 [c])
(CMOVQEQconst x (FlagLT_ULT)) -> x
(CMOVLEQconst x (FlagLT_ULT)) -> x
(CMOVWEQconst x (FlagLT_ULT)) -> x
(CMOVQEQconst x (FlagLT_UGT)) -> x
(CMOVLEQconst x (FlagLT_UGT)) -> x
(CMOVWEQconst x (FlagLT_UGT)) -> x
(CMOVQEQconst x (FlagGT_ULT)) -> x
(CMOVLEQconst x (FlagGT_ULT)) -> x
(CMOVWEQconst x (FlagGT_ULT)) -> x
(CMOVQEQconst x (FlagGT_UGT)) -> x
(CMOVLEQconst x (FlagGT_UGT)) -> x
(CMOVWEQconst x (FlagGT_UGT)) -> x
// Combining byte loads into larger (unaligned) loads. // Combining byte loads into larger (unaligned) loads.
// There are many ways these combinations could occur. This is // There are many ways these combinations could occur. This is
// designed to match the way encoding/binary.LittleEndian does it. // designed to match the way encoding/binary.LittleEndian does it.

View file

@ -122,8 +122,7 @@ func init() {
gp1flags = regInfo{inputs: []regMask{gpsp}} gp1flags = regInfo{inputs: []regMask{gpsp}}
flagsgp = regInfo{inputs: nil, outputs: gponly} flagsgp = regInfo{inputs: nil, outputs: gponly}
// for CMOVconst -- uses AX to hold constant temporary. gp11flags = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}}
gp1flagsgp = regInfo{inputs: []regMask{gp &^ ax}, clobbers: ax, outputs: []regMask{gp &^ ax}}
readflags = regInfo{inputs: nil, outputs: gponly} readflags = regInfo{inputs: nil, outputs: gponly}
flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}} flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
@ -285,21 +284,16 @@ func init() {
{name: "NOTQ", argLength: 1, reg: gp11, asm: "NOTQ", resultInArg0: true, clobberFlags: true}, // ^arg0 {name: "NOTQ", argLength: 1, reg: gp11, asm: "NOTQ", resultInArg0: true, clobberFlags: true}, // ^arg0
{name: "NOTL", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true, clobberFlags: true}, // ^arg0 {name: "NOTL", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true, clobberFlags: true}, // ^arg0
{name: "BSFQ", argLength: 1, reg: gp11, asm: "BSFQ", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero // BSF{L,Q} returns a tuple [result, flags]
{name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero // result is undefined if the input is zero.
{name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero // flags are set to "equal" if the input is zero, "not equal" otherwise.
{name: "BSFQ", argLength: 1, reg: gp11flags, asm: "BSFQ", typ: "(UInt64,Flags)"}, // # of low-order zeroes in 64-bit arg
{name: "BSRQ", argLength: 1, reg: gp11, asm: "BSRQ", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero {name: "BSFL", argLength: 1, reg: gp11flags, asm: "BSFL", typ: "(UInt32,Flags)"}, // # of low-order zeroes in 32-bit arg
{name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
{name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
// Note ASM for ops moves whole register // Note ASM for ops moves whole register
{name: "CMOVQEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQEQ", typ: "UInt64", aux: "Int64", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z set //
{name: "CMOVLEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt32", aux: "Int32", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z set {name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0
{name: "CMOVWEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt16", aux: "Int16", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z set {name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0
{name: "CMOVQNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQNE", typ: "UInt64", aux: "Int64", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z not set
{name: "CMOVLNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt32", aux: "Int32", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z not set
{name: "CMOVWNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt16", aux: "Int16", resultInArg0: true, clobberFlags: true}, // replace arg0 w/ constant if Z not set
{name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes {name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes {name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes

View file

@ -257,14 +257,9 @@ var genericOps = []opData{
{name: "Com32", argLength: 1}, {name: "Com32", argLength: 1},
{name: "Com64", argLength: 1}, {name: "Com64", argLength: 1},
{name: "Ctz16", argLength: 1}, // Count trailing (low order) zeroes (returns 0-16) {name: "Ctz32", argLength: 1}, // Count trailing (low order) zeroes (returns 0-32)
{name: "Ctz32", argLength: 1}, // Count trailing zeroes (returns 0-32)
{name: "Ctz64", argLength: 1}, // Count trailing zeroes (returns 0-64) {name: "Ctz64", argLength: 1}, // Count trailing zeroes (returns 0-64)
{name: "Clz16", argLength: 1}, // Count leading (high order) zeroes (returns 0-16)
{name: "Clz32", argLength: 1}, // Count leading zeroes (returns 0-32)
{name: "Clz64", argLength: 1}, // Count leading zeroes (returns 0-64)
{name: "Bswap32", argLength: 1}, // Swap bytes {name: "Bswap32", argLength: 1}, // Swap bytes
{name: "Bswap64", argLength: 1}, // Swap bytes {name: "Bswap64", argLength: 1}, // Swap bytes

View file

@ -479,16 +479,8 @@ const (
OpAMD64NOTL OpAMD64NOTL
OpAMD64BSFQ OpAMD64BSFQ
OpAMD64BSFL OpAMD64BSFL
OpAMD64BSFW OpAMD64CMOVQEQ
OpAMD64BSRQ OpAMD64CMOVLEQ
OpAMD64BSRL
OpAMD64BSRW
OpAMD64CMOVQEQconst
OpAMD64CMOVLEQconst
OpAMD64CMOVWEQconst
OpAMD64CMOVQNEconst
OpAMD64CMOVLNEconst
OpAMD64CMOVWNEconst
OpAMD64BSWAPQ OpAMD64BSWAPQ
OpAMD64BSWAPL OpAMD64BSWAPL
OpAMD64SQRTSD OpAMD64SQRTSD
@ -1378,12 +1370,8 @@ const (
OpCom16 OpCom16
OpCom32 OpCom32
OpCom64 OpCom64
OpCtz16
OpCtz32 OpCtz32
OpCtz64 OpCtz64
OpClz16
OpClz32
OpClz64
OpBswap32 OpBswap32
OpBswap64 OpBswap64
OpSqrt OpSqrt
@ -5487,188 +5475,60 @@ var opcodeTable = [...]opInfo{
}, },
}, },
{ {
name: "BSFQ", name: "BSFQ",
argLen: 1, argLen: 1,
clobberFlags: true, asm: x86.ABSFQ,
asm: x86.ABSFQ,
reg: regInfo{ reg: regInfo{
inputs: []inputInfo{ inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
}, },
outputs: []outputInfo{ outputs: []outputInfo{
{1, 0},
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
}, },
}, },
}, },
{ {
name: "BSFL", name: "BSFL",
argLen: 1, argLen: 1,
clobberFlags: true, asm: x86.ABSFL,
asm: x86.ABSFL,
reg: regInfo{ reg: regInfo{
inputs: []inputInfo{ inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
}, },
outputs: []outputInfo{ outputs: []outputInfo{
{1, 0},
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
}, },
}, },
}, },
{ {
name: "BSFW", name: "CMOVQEQ",
argLen: 1, argLen: 3,
clobberFlags: true,
asm: x86.ABSFW,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BSRQ",
argLen: 1,
clobberFlags: true,
asm: x86.ABSRQ,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BSRL",
argLen: 1,
clobberFlags: true,
asm: x86.ABSRL,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BSRW",
argLen: 1,
clobberFlags: true,
asm: x86.ABSRW,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVQEQconst",
auxType: auxInt64,
argLen: 2,
resultInArg0: true, resultInArg0: true,
clobberFlags: true,
asm: x86.ACMOVQEQ, asm: x86.ACMOVQEQ,
reg: regInfo{ reg: regInfo{
inputs: []inputInfo{ inputs: []inputInfo{
{0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
}, },
clobbers: 1, // AX
outputs: []outputInfo{ outputs: []outputInfo{
{0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
}, },
}, },
}, },
{ {
name: "CMOVLEQconst", name: "CMOVLEQ",
auxType: auxInt32, argLen: 3,
argLen: 2,
resultInArg0: true, resultInArg0: true,
clobberFlags: true,
asm: x86.ACMOVLEQ, asm: x86.ACMOVLEQ,
reg: regInfo{ reg: regInfo{
inputs: []inputInfo{ inputs: []inputInfo{
{0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
}, },
clobbers: 1, // AX
outputs: []outputInfo{ outputs: []outputInfo{
{0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWEQconst",
auxType: auxInt16,
argLen: 2,
resultInArg0: true,
clobberFlags: true,
asm: x86.ACMOVLEQ,
reg: regInfo{
inputs: []inputInfo{
{0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
clobbers: 1, // AX
outputs: []outputInfo{
{0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVQNEconst",
auxType: auxInt64,
argLen: 2,
resultInArg0: true,
clobberFlags: true,
asm: x86.ACMOVQNE,
reg: regInfo{
inputs: []inputInfo{
{0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
clobbers: 1, // AX
outputs: []outputInfo{
{0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVLNEconst",
auxType: auxInt32,
argLen: 2,
resultInArg0: true,
clobberFlags: true,
asm: x86.ACMOVLNE,
reg: regInfo{
inputs: []inputInfo{
{0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
clobbers: 1, // AX
outputs: []outputInfo{
{0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "CMOVWNEconst",
auxType: auxInt16,
argLen: 2,
resultInArg0: true,
clobberFlags: true,
asm: x86.ACMOVLNE,
reg: regInfo{
inputs: []inputInfo{
{0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
clobbers: 1, // AX
outputs: []outputInfo{
{0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
}, },
}, },
}, },
@ -15637,11 +15497,6 @@ var opcodeTable = [...]opInfo{
argLen: 1, argLen: 1,
generic: true, generic: true,
}, },
{
name: "Ctz16",
argLen: 1,
generic: true,
},
{ {
name: "Ctz32", name: "Ctz32",
argLen: 1, argLen: 1,
@ -15652,21 +15507,6 @@ var opcodeTable = [...]opInfo{
argLen: 1, argLen: 1,
generic: true, generic: true,
}, },
{
name: "Clz16",
argLen: 1,
generic: true,
},
{
name: "Clz32",
argLen: 1,
generic: true,
},
{
name: "Clz64",
argLen: 1,
generic: true,
},
{ {
name: "Bswap32", name: "Bswap32",
argLen: 1, argLen: 1,

View file

@ -24,12 +24,6 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
return rewriteValueAMD64_OpAMD64ANDQ(v, config) return rewriteValueAMD64_OpAMD64ANDQ(v, config)
case OpAMD64ANDQconst: case OpAMD64ANDQconst:
return rewriteValueAMD64_OpAMD64ANDQconst(v, config) return rewriteValueAMD64_OpAMD64ANDQconst(v, config)
case OpAMD64CMOVLEQconst:
return rewriteValueAMD64_OpAMD64CMOVLEQconst(v, config)
case OpAMD64CMOVQEQconst:
return rewriteValueAMD64_OpAMD64CMOVQEQconst(v, config)
case OpAMD64CMOVWEQconst:
return rewriteValueAMD64_OpAMD64CMOVWEQconst(v, config)
case OpAMD64CMPB: case OpAMD64CMPB:
return rewriteValueAMD64_OpAMD64CMPB(v, config) return rewriteValueAMD64_OpAMD64CMPB(v, config)
case OpAMD64CMPBconst: case OpAMD64CMPBconst:
@ -330,8 +324,6 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
return rewriteValueAMD64_OpConstNil(v, config) return rewriteValueAMD64_OpConstNil(v, config)
case OpConvert: case OpConvert:
return rewriteValueAMD64_OpConvert(v, config) return rewriteValueAMD64_OpConvert(v, config)
case OpCtz16:
return rewriteValueAMD64_OpCtz16(v, config)
case OpCtz32: case OpCtz32:
return rewriteValueAMD64_OpCtz32(v, config) return rewriteValueAMD64_OpCtz32(v, config)
case OpCtz64: case OpCtz64:
@ -1556,279 +1548,6 @@ func rewriteValueAMD64_OpAMD64ANDQconst(v *Value, config *Config) bool {
} }
return false return false
} }
func rewriteValueAMD64_OpAMD64CMOVLEQconst(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (CMOVLEQconst x (InvertFlags y) [c])
// cond:
// result: (CMOVLNEconst x y [c])
for {
c := v.AuxInt
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64InvertFlags {
break
}
y := v_1.Args[0]
v.reset(OpAMD64CMOVLNEconst)
v.AuxInt = c
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (CMOVLEQconst _ (FlagEQ) [c])
// cond:
// result: (Const32 [c])
for {
c := v.AuxInt
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagEQ {
break
}
v.reset(OpConst32)
v.AuxInt = c
return true
}
// match: (CMOVLEQconst x (FlagLT_ULT))
// cond:
// result: x
for {
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagLT_ULT {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (CMOVLEQconst x (FlagLT_UGT))
// cond:
// result: x
for {
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagLT_UGT {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (CMOVLEQconst x (FlagGT_ULT))
// cond:
// result: x
for {
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagGT_ULT {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (CMOVLEQconst x (FlagGT_UGT))
// cond:
// result: x
for {
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagGT_UGT {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64CMOVQEQconst(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (CMOVQEQconst x (InvertFlags y) [c])
// cond:
// result: (CMOVQNEconst x y [c])
for {
c := v.AuxInt
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64InvertFlags {
break
}
y := v_1.Args[0]
v.reset(OpAMD64CMOVQNEconst)
v.AuxInt = c
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (CMOVQEQconst _ (FlagEQ) [c])
// cond:
// result: (Const64 [c])
for {
c := v.AuxInt
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagEQ {
break
}
v.reset(OpConst64)
v.AuxInt = c
return true
}
// match: (CMOVQEQconst x (FlagLT_ULT))
// cond:
// result: x
for {
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagLT_ULT {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (CMOVQEQconst x (FlagLT_UGT))
// cond:
// result: x
for {
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagLT_UGT {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (CMOVQEQconst x (FlagGT_ULT))
// cond:
// result: x
for {
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagGT_ULT {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (CMOVQEQconst x (FlagGT_UGT))
// cond:
// result: x
for {
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagGT_UGT {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64CMOVWEQconst(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (CMOVWEQconst x (InvertFlags y) [c])
// cond:
// result: (CMOVWNEconst x y [c])
for {
c := v.AuxInt
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64InvertFlags {
break
}
y := v_1.Args[0]
v.reset(OpAMD64CMOVWNEconst)
v.AuxInt = c
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (CMOVWEQconst _ (FlagEQ) [c])
// cond:
// result: (Const16 [c])
for {
c := v.AuxInt
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagEQ {
break
}
v.reset(OpConst16)
v.AuxInt = c
return true
}
// match: (CMOVWEQconst x (FlagLT_ULT))
// cond:
// result: x
for {
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagLT_ULT {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (CMOVWEQconst x (FlagLT_UGT))
// cond:
// result: x
for {
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagLT_UGT {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (CMOVWEQconst x (FlagGT_ULT))
// cond:
// result: x
for {
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagGT_ULT {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
// match: (CMOVWEQconst x (FlagGT_UGT))
// cond:
// result: x
for {
x := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpAMD64FlagGT_UGT {
break
}
v.reset(OpCopy)
v.Type = x.Type
v.AddArg(x)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64CMPB(v *Value, config *Config) bool { func rewriteValueAMD64_OpAMD64CMPB(v *Value, config *Config) bool {
b := v.Block b := v.Block
_ = b _ = b
@ -13633,45 +13352,29 @@ func rewriteValueAMD64_OpConvert(v *Value, config *Config) bool {
} }
return false return false
} }
func rewriteValueAMD64_OpCtz16(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (Ctz16 <t> x)
// cond:
// result: (CMOVWEQconst (BSFW <t> x) (CMPWconst x [0]) [16])
for {
t := v.Type
x := v.Args[0]
v.reset(OpAMD64CMOVWEQconst)
v.AuxInt = 16
v0 := b.NewValue0(v.Line, OpAMD64BSFW, t)
v0.AddArg(x)
v.AddArg(v0)
v1 := b.NewValue0(v.Line, OpAMD64CMPWconst, TypeFlags)
v1.AuxInt = 0
v1.AddArg(x)
v.AddArg(v1)
return true
}
}
func rewriteValueAMD64_OpCtz32(v *Value, config *Config) bool { func rewriteValueAMD64_OpCtz32(v *Value, config *Config) bool {
b := v.Block b := v.Block
_ = b _ = b
// match: (Ctz32 <t> x) // match: (Ctz32 <t> x)
// cond: // cond:
// result: (CMOVLEQconst (BSFL <t> x) (CMPLconst x [0]) [32]) // result: (CMOVLEQ (Select0 <t> (BSFL x)) (MOVLconst <t> [32]) (Select1 <TypeFlags> (BSFL x)))
for { for {
t := v.Type t := v.Type
x := v.Args[0] x := v.Args[0]
v.reset(OpAMD64CMOVLEQconst) v.reset(OpAMD64CMOVLEQ)
v.AuxInt = 32 v0 := b.NewValue0(v.Line, OpSelect0, t)
v0 := b.NewValue0(v.Line, OpAMD64BSFL, t) v1 := b.NewValue0(v.Line, OpAMD64BSFL, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
v0.AddArg(x)
v.AddArg(v0)
v1 := b.NewValue0(v.Line, OpAMD64CMPLconst, TypeFlags)
v1.AuxInt = 0
v1.AddArg(x) v1.AddArg(x)
v.AddArg(v1) v0.AddArg(v1)
v.AddArg(v0)
v2 := b.NewValue0(v.Line, OpAMD64MOVLconst, t)
v2.AuxInt = 32
v.AddArg(v2)
v3 := b.NewValue0(v.Line, OpSelect1, TypeFlags)
v4 := b.NewValue0(v.Line, OpAMD64BSFL, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
v4.AddArg(x)
v3.AddArg(v4)
v.AddArg(v3)
return true return true
} }
} }
@ -13680,19 +13383,24 @@ func rewriteValueAMD64_OpCtz64(v *Value, config *Config) bool {
_ = b _ = b
// match: (Ctz64 <t> x) // match: (Ctz64 <t> x)
// cond: // cond:
// result: (CMOVQEQconst (BSFQ <t> x) (CMPQconst x [0]) [64]) // result: (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <TypeFlags> (BSFQ x)))
for { for {
t := v.Type t := v.Type
x := v.Args[0] x := v.Args[0]
v.reset(OpAMD64CMOVQEQconst) v.reset(OpAMD64CMOVQEQ)
v.AuxInt = 64 v0 := b.NewValue0(v.Line, OpSelect0, t)
v0 := b.NewValue0(v.Line, OpAMD64BSFQ, t) v1 := b.NewValue0(v.Line, OpAMD64BSFQ, MakeTuple(config.fe.TypeUInt64(), TypeFlags))
v0.AddArg(x)
v.AddArg(v0)
v1 := b.NewValue0(v.Line, OpAMD64CMPQconst, TypeFlags)
v1.AuxInt = 0
v1.AddArg(x) v1.AddArg(x)
v.AddArg(v1) v0.AddArg(v1)
v.AddArg(v0)
v2 := b.NewValue0(v.Line, OpAMD64MOVQconst, t)
v2.AuxInt = 64
v.AddArg(v2)
v3 := b.NewValue0(v.Line, OpSelect1, TypeFlags)
v4 := b.NewValue0(v.Line, OpAMD64BSFQ, MakeTuple(config.fe.TypeUInt64(), TypeFlags))
v4.AddArg(x)
v3.AddArg(v4)
v.AddArg(v3)
return true return true
} }
} }

View file

@ -30,19 +30,6 @@ var deBruijnIdx32 = [32]byte{
30, 9, 19, 24, 29, 18, 28, 27, 30, 9, 19, 24, 29, 18, 28, 27,
} }
const deBruijn16 = 0x09af
var deBruijnIdx16 = [16]byte{
0, 1, 2, 5, 3, 9, 6, 11,
15, 4, 8, 10, 14, 7, 13, 12,
}
const deBruijn8 = 0x17
var deBruijnIdx8 = [8]byte{
0, 1, 2, 4, 7, 3, 6, 5,
}
// Ctz64 counts trailing (low-order) zeroes, // Ctz64 counts trailing (low-order) zeroes,
// and if all are zero, then 64. // and if all are zero, then 64.
func Ctz64(x uint64) uint64 { func Ctz64(x uint64) uint64 {
@ -63,26 +50,6 @@ func Ctz32(x uint32) uint32 {
return y + z return y + z
} }
// Ctz16 counts trailing (low-order) zeroes,
// and if all are zero, then 16.
func Ctz16(x uint16) uint16 {
x &= -x // isolate low-order bit
y := x * deBruijn16 >> 12 // extract part of deBruijn sequence
y = uint16(deBruijnIdx16[y]) // convert to bit index
z := (x - 1) >> 11 & 16 // adjustment if zero
return y + z
}
// Ctz8 counts trailing (low-order) zeroes,
// and if all are zero, then 8.
func Ctz8(x uint8) uint8 {
x &= -x // isolate low-order bit
y := x * deBruijn8 >> 5 // extract part of deBruijn sequence
y = uint8(deBruijnIdx8[y]) // convert to bit index
z := (x - 1) >> 4 & 8 // adjustment if zero
return y + z
}
// Bswap64 returns its input with byte order reversed // Bswap64 returns its input with byte order reversed
// 0x0102030405060708 -> 0x0807060504030201 // 0x0102030405060708 -> 0x0807060504030201
func Bswap64(x uint64) uint64 { func Bswap64(x uint64) uint64 {

View file

@ -36,22 +36,6 @@ TEXT runtimeinternalsys·Ctz32(SB), NOSPLIT, $0-8
MOVL AX, ret+4(FP) MOVL AX, ret+4(FP)
RET RET
TEXT runtimeinternalsys·Ctz16(SB), NOSPLIT, $0-6
MOVW x+0(FP), AX
BSFW AX, AX
JNZ 2(PC)
MOVW $16, AX
MOVW AX, ret+4(FP)
RET
TEXT runtimeinternalsys·Ctz8(SB), NOSPLIT, $0-5
MOVBLZX x+0(FP), AX
BSFL AX, AX
JNZ 2(PC)
MOVB $8, AX
MOVB AX, ret+4(FP)
RET
TEXT runtimeinternalsys·Bswap64(SB), NOSPLIT, $0-16 TEXT runtimeinternalsys·Bswap64(SB), NOSPLIT, $0-16
MOVL x_lo+0(FP), AX MOVL x_lo+0(FP), AX
MOVL x_hi+4(FP), BX MOVL x_hi+4(FP), BX

View file

@ -8,7 +8,5 @@ package sys
func Ctz64(x uint64) uint64 func Ctz64(x uint64) uint64
func Ctz32(x uint32) uint32 func Ctz32(x uint32) uint32
func Ctz16(x uint16) uint16
func Ctz8(x uint8) uint8
func Bswap64(x uint64) uint64 func Bswap64(x uint64) uint64
func Bswap32(x uint32) uint32 func Bswap32(x uint32) uint32

View file

@ -21,22 +21,6 @@ func TestCtz32(t *testing.T) {
} }
} }
} }
func TestCtz16(t *testing.T) {
for i := uint(0); i <= 16; i++ {
x := uint16(5) << i
if got := sys.Ctz16(x); got != uint16(i) {
t.Errorf("Ctz16(%d)=%d, want %d", x, got, i)
}
}
}
func TestCtz8(t *testing.T) {
for i := uint(0); i <= 8; i++ {
x := uint8(5) << i
if got := sys.Ctz8(x); got != uint8(i) {
t.Errorf("Ctz8(%d)=%d, want %d", x, got, i)
}
}
}
func TestBswap64(t *testing.T) { func TestBswap64(t *testing.T) {
x := uint64(0x1122334455667788) x := uint64(0x1122334455667788)

View file

@ -45,18 +45,6 @@ func test(i, x uint64) {
logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32) logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32)
} }
} }
if i <= 16 {
x16 := uint16(x)
t16 := T.Ctz16(x16) // ERROR "intrinsic substitution for Ctz16"
if uint16(i) != t16 {
logf("Ctz16(0x%x) expected %d but got %d\n", x16, i, t16)
}
x16 = -x16
t16 = T.Ctz16(x16) // ERROR "intrinsic substitution for Ctz16"
if uint16(i) != t16 {
logf("Ctz16(0x%x) expected %d but got %d\n", x16, i, t16)
}
}
} }
func main() { func main() {
@ -88,9 +76,6 @@ func main() {
} }
// Zero is a special case, be sure it is done right. // Zero is a special case, be sure it is done right.
if T.Ctz16(0) != 16 { // ERROR "intrinsic substitution for Ctz16"
logf("ctz16(0) != 16")
}
if T.Ctz32(0) != 32 { // ERROR "intrinsic substitution for Ctz32" if T.Ctz32(0) != 32 { // ERROR "intrinsic substitution for Ctz32"
logf("ctz32(0) != 32") logf("ctz32(0) != 32")
} }