mirror of
				https://github.com/golang/go.git
				synced 2025-10-31 16:50:58 +00:00 
			
		
		
		
	cmd/compile: instrinsify TrailingZeros{8,32,64} for 386
This CL add support for instrinsifying the TrialingZeros{8,32,64}
functions for 386 architecture. We need handle the case when the input
is 0, which could lead to undefined output from the BSFL instruction.
Next CL will remove the assembly code in runtime/internal/sys package.
Change-Id: Ic168edf68e81bf69a536102100fdd3f56f0f4a1b
Reviewed-on: https://go-review.googlesource.com/c/go/+/475735
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
			
			
This commit is contained in:
		
							parent
							
								
									82bf12902f
								
							
						
					
					
						commit
						cedfcba3e8
					
				
					 7 changed files with 77 additions and 3 deletions
				
			
		|  | @ -56,8 +56,12 @@ | ||||||
| (Sqrt ...) => (SQRTSD ...) | (Sqrt ...) => (SQRTSD ...) | ||||||
| (Sqrt32 ...) => (SQRTSS ...) | (Sqrt32 ...) => (SQRTSS ...) | ||||||
| 
 | 
 | ||||||
|  | (Ctz8 x) => (BSFL (ORLconst <typ.UInt32> [0x100] x)) | ||||||
|  | (Ctz8NonZero ...) => (BSFL ...) | ||||||
| (Ctz16 x) => (BSFL (ORLconst <typ.UInt32> [0x10000] x)) | (Ctz16 x) => (BSFL (ORLconst <typ.UInt32> [0x10000] x)) | ||||||
| (Ctz16NonZero ...) => (BSFL ...) | (Ctz16NonZero ...) => (BSFL ...) | ||||||
|  | (Ctz32 ...) => (LoweredCtz32 ...) | ||||||
|  | (Ctz32NonZero ...) => (BSFL ...) | ||||||
| 
 | 
 | ||||||
| // Lowering extension | // Lowering extension | ||||||
| (SignExt8to16  ...) => (MOVBLSX ...) | (SignExt8to16  ...) => (MOVBLSX ...) | ||||||
|  |  | ||||||
|  | @ -302,6 +302,7 @@ func init() { | ||||||
| 
 | 
 | ||||||
| 		{name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero | 		{name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero | ||||||
| 		{name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero | 		{name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero | ||||||
|  | 		{name: "LoweredCtz32", argLength: 1, reg: gp11, clobberFlags: true},      // arg0 # of low-order zeroes | ||||||
| 
 | 
 | ||||||
| 		{name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero | 		{name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero | ||||||
| 		{name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero | 		{name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero | ||||||
|  |  | ||||||
|  | @ -456,6 +456,7 @@ const ( | ||||||
| 	Op386NOTL | 	Op386NOTL | ||||||
| 	Op386BSFL | 	Op386BSFL | ||||||
| 	Op386BSFW | 	Op386BSFW | ||||||
|  | 	Op386LoweredCtz32 | ||||||
| 	Op386BSRL | 	Op386BSRL | ||||||
| 	Op386BSRW | 	Op386BSRW | ||||||
| 	Op386BSWAPL | 	Op386BSWAPL | ||||||
|  | @ -5034,6 +5035,20 @@ var opcodeTable = [...]opInfo{ | ||||||
| 			}, | 			}, | ||||||
| 		}, | 		}, | ||||||
| 	}, | 	}, | ||||||
|  | 	{ | ||||||
|  | 		name:         "LoweredCtz32", | ||||||
|  | 		argLen:       1, | ||||||
|  | 		clobberFlags: true, | ||||||
|  | 		asm:          x86.ABSFL, | ||||||
|  | 		reg: regInfo{ | ||||||
|  | 			inputs: []inputInfo{ | ||||||
|  | 				{0, 239}, // AX CX DX BX BP SI DI | ||||||
|  | 			}, | ||||||
|  | 			outputs: []outputInfo{ | ||||||
|  | 				{0, 239}, // AX CX DX BX BP SI DI | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 	}, | ||||||
| 	{ | 	{ | ||||||
| 		name:         "BSRL", | 		name:         "BSRL", | ||||||
| 		argLen:       1, | 		argLen:       1, | ||||||
|  |  | ||||||
|  | @ -315,6 +315,17 @@ func rewriteValue386(v *Value) bool { | ||||||
| 	case OpCtz16NonZero: | 	case OpCtz16NonZero: | ||||||
| 		v.Op = Op386BSFL | 		v.Op = Op386BSFL | ||||||
| 		return true | 		return true | ||||||
|  | 	case OpCtz32: | ||||||
|  | 		v.Op = Op386LoweredCtz32 | ||||||
|  | 		return true | ||||||
|  | 	case OpCtz32NonZero: | ||||||
|  | 		v.Op = Op386BSFL | ||||||
|  | 		return true | ||||||
|  | 	case OpCtz8: | ||||||
|  | 		return rewriteValue386_OpCtz8(v) | ||||||
|  | 	case OpCtz8NonZero: | ||||||
|  | 		v.Op = Op386BSFL | ||||||
|  | 		return true | ||||||
| 	case OpCvt32Fto32: | 	case OpCvt32Fto32: | ||||||
| 		v.Op = Op386CVTTSS2SL | 		v.Op = Op386CVTTSS2SL | ||||||
| 		return true | 		return true | ||||||
|  | @ -8527,6 +8538,22 @@ func rewriteValue386_OpCtz16(v *Value) bool { | ||||||
| 		return true | 		return true | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  | func rewriteValue386_OpCtz8(v *Value) bool { | ||||||
|  | 	v_0 := v.Args[0] | ||||||
|  | 	b := v.Block | ||||||
|  | 	typ := &b.Func.Config.Types | ||||||
|  | 	// match: (Ctz8 x) | ||||||
|  | 	// result: (BSFL (ORLconst <typ.UInt32> [0x100] x)) | ||||||
|  | 	for { | ||||||
|  | 		x := v_0 | ||||||
|  | 		v.reset(Op386BSFL) | ||||||
|  | 		v0 := b.NewValue0(v.Pos, Op386ORLconst, typ.UInt32) | ||||||
|  | 		v0.AuxInt = int32ToAuxInt(0x100) | ||||||
|  | 		v0.AddArg(x) | ||||||
|  | 		v.AddArg(v0) | ||||||
|  | 		return true | ||||||
|  | 	} | ||||||
|  | } | ||||||
| func rewriteValue386_OpDiv8(v *Value) bool { | func rewriteValue386_OpDiv8(v *Value) bool { | ||||||
| 	v_1 := v.Args[1] | 	v_1 := v.Args[1] | ||||||
| 	v_0 := v.Args[0] | 	v_0 := v.Args[0] | ||||||
|  |  | ||||||
|  | @ -4492,12 +4492,12 @@ func InitTables() { | ||||||
| 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { | 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { | ||||||
| 			return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], args[0]) | 			return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], args[0]) | ||||||
| 		}, | 		}, | ||||||
| 		sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) | 		sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) | ||||||
| 	addF("math/bits", "TrailingZeros32", | 	addF("math/bits", "TrailingZeros32", | ||||||
| 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { | 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { | ||||||
| 			return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0]) | 			return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0]) | ||||||
| 		}, | 		}, | ||||||
| 		sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) | 		sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) | ||||||
| 	addF("math/bits", "TrailingZeros16", | 	addF("math/bits", "TrailingZeros16", | ||||||
| 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { | 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { | ||||||
| 			x := s.newValue1(ssa.OpZeroExt16to32, types.Types[types.TUINT32], args[0]) | 			x := s.newValue1(ssa.OpZeroExt16to32, types.Types[types.TUINT32], args[0]) | ||||||
|  | @ -4531,7 +4531,7 @@ func InitTables() { | ||||||
| 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { | 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { | ||||||
| 			return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0]) | 			return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0]) | ||||||
| 		}, | 		}, | ||||||
| 		sys.AMD64, sys.ARM, sys.ARM64, sys.Wasm) | 		sys.AMD64, sys.I386, sys.ARM, sys.ARM64, sys.Wasm) | ||||||
| 	addF("math/bits", "TrailingZeros8", | 	addF("math/bits", "TrailingZeros8", | ||||||
| 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { | 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { | ||||||
| 			x := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], args[0]) | 			x := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], args[0]) | ||||||
|  |  | ||||||
|  | @ -831,6 +831,29 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { | ||||||
| 		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers | 		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers | ||||||
| 			base.WarnfAt(v.Pos, "generated nil check") | 			base.WarnfAt(v.Pos, "generated nil check") | ||||||
| 		} | 		} | ||||||
|  | 	case ssa.Op386LoweredCtz32: | ||||||
|  | 		// BSFL in, out | ||||||
|  | 		p := s.Prog(x86.ABSFL) | ||||||
|  | 		p.From.Type = obj.TYPE_REG | ||||||
|  | 		p.From.Reg = v.Args[0].Reg() | ||||||
|  | 		p.To.Type = obj.TYPE_REG | ||||||
|  | 		p.To.Reg = v.Reg() | ||||||
|  | 
 | ||||||
|  | 		// JNZ 2(PC) | ||||||
|  | 		p1 := s.Prog(x86.AJNE) | ||||||
|  | 		p1.To.Type = obj.TYPE_BRANCH | ||||||
|  | 
 | ||||||
|  | 		// MOVL $32, out | ||||||
|  | 		p2 := s.Prog(x86.AMOVL) | ||||||
|  | 		p2.From.Type = obj.TYPE_CONST | ||||||
|  | 		p2.From.Offset = 32 | ||||||
|  | 		p2.To.Type = obj.TYPE_REG | ||||||
|  | 		p2.To.Reg = v.Reg() | ||||||
|  | 
 | ||||||
|  | 		// NOP (so the JNZ has somewhere to land) | ||||||
|  | 		nop := s.Prog(obj.ANOP) | ||||||
|  | 		p1.To.SetTarget(nop) | ||||||
|  | 
 | ||||||
| 	case ssa.OpClobber: | 	case ssa.OpClobber: | ||||||
| 		p := s.Prog(x86.AMOVL) | 		p := s.Prog(x86.AMOVL) | ||||||
| 		p.From.Type = obj.TYPE_CONST | 		p.From.Type = obj.TYPE_CONST | ||||||
|  |  | ||||||
|  | @ -293,6 +293,7 @@ func RotateLeftVariable32(n uint32, m int) uint32 { | ||||||
| func TrailingZeros(n uint) int { | func TrailingZeros(n uint) int { | ||||||
| 	// amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" | 	// amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" | ||||||
| 	// amd64/v3:"TZCNTQ" | 	// amd64/v3:"TZCNTQ" | ||||||
|  | 	// 386:"BSFL" | ||||||
| 	// arm:"CLZ" | 	// arm:"CLZ" | ||||||
| 	// arm64:"RBIT","CLZ" | 	// arm64:"RBIT","CLZ" | ||||||
| 	// s390x:"FLOGR" | 	// s390x:"FLOGR" | ||||||
|  | @ -305,6 +306,7 @@ func TrailingZeros(n uint) int { | ||||||
| func TrailingZeros64(n uint64) int { | func TrailingZeros64(n uint64) int { | ||||||
| 	// amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" | 	// amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" | ||||||
| 	// amd64/v3:"TZCNTQ" | 	// amd64/v3:"TZCNTQ" | ||||||
|  | 	// 386:"BSFL" | ||||||
| 	// arm64:"RBIT","CLZ" | 	// arm64:"RBIT","CLZ" | ||||||
| 	// s390x:"FLOGR" | 	// s390x:"FLOGR" | ||||||
| 	// ppc64x/power8:"ANDN","POPCNTD" | 	// ppc64x/power8:"ANDN","POPCNTD" | ||||||
|  | @ -322,6 +324,7 @@ func TrailingZeros64Subtract(n uint64) int { | ||||||
| func TrailingZeros32(n uint32) int { | func TrailingZeros32(n uint32) int { | ||||||
| 	// amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ" | 	// amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ" | ||||||
| 	// amd64/v3:"TZCNTL" | 	// amd64/v3:"TZCNTL" | ||||||
|  | 	// 386:"BSFL" | ||||||
| 	// arm:"CLZ" | 	// arm:"CLZ" | ||||||
| 	// arm64:"RBITW","CLZW" | 	// arm64:"RBITW","CLZW" | ||||||
| 	// s390x:"FLOGR","MOVWZ" | 	// s390x:"FLOGR","MOVWZ" | ||||||
|  | @ -345,6 +348,7 @@ func TrailingZeros16(n uint16) int { | ||||||
| 
 | 
 | ||||||
| func TrailingZeros8(n uint8) int { | func TrailingZeros8(n uint8) int { | ||||||
| 	// amd64:"BSFL","BTSL\\t\\$8" | 	// amd64:"BSFL","BTSL\\t\\$8" | ||||||
|  | 	// 386:"BSFL" | ||||||
| 	// arm:"ORR\t\\$256","CLZ",-"MOVBU\tR" | 	// arm:"ORR\t\\$256","CLZ",-"MOVBU\tR" | ||||||
| 	// arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t" | 	// arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t" | ||||||
| 	// s390x:"FLOGR","OR\t\\$256" | 	// s390x:"FLOGR","OR\t\\$256" | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Wayne Zuo
						Wayne Zuo