mirror of
				https://github.com/golang/go.git
				synced 2025-10-31 16:50:58 +00:00 
			
		
		
		
	cmd/compile: optimize absorbing InvertFlags into Noov comparisons for arm64
Previously (LessThanNoov (InvertFlags x)) is lowered as:
CSET
CSET
BIC
With this CL it's lowered as:
CSET
CSEL
This saves one instruction.
Similarly (GreaterEqualNoov (InvertFlags x)) is now lowered as:
CSET
CSINC
$ benchstat old.bench new.bench
goos: linux
goarch: arm64
                       │  old.bench  │             new.bench              │
                       │   sec/op    │   sec/op     vs base               │
InvertLessThanNoov-160   2.249n ± 2%   2.190n ± 1%  -2.62% (p=0.003 n=10)
Change-Id: Idd8979b7f4fe466e74b1a201c4aba7f1b0cffb0b
Reviewed-on: https://go-review.googlesource.com/c/go/+/526237
Reviewed-by: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Eric Fang <eric.fang@arm.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
			
			
This commit is contained in:
		
							parent
							
								
									dd881027c3
								
							
						
					
					
						commit
						ace1494d92
					
				
					 4 changed files with 43 additions and 12 deletions
				
			
		|  | @ -1569,8 +1569,8 @@ | ||||||
| (LessEqualF       (InvertFlags x)) => (GreaterEqualF x) | (LessEqualF       (InvertFlags x)) => (GreaterEqualF x) | ||||||
| (GreaterThanF     (InvertFlags x)) => (LessThanF x) | (GreaterThanF     (InvertFlags x)) => (LessThanF x) | ||||||
| (GreaterEqualF    (InvertFlags x)) => (LessEqualF x) | (GreaterEqualF    (InvertFlags x)) => (LessEqualF x) | ||||||
| (LessThanNoov     (InvertFlags x)) => (BIC (GreaterEqualNoov <typ.Bool> x) (Equal <typ.Bool> x)) | (LessThanNoov     (InvertFlags x)) => (CSEL0 [OpARM64NotEqual] (GreaterEqualNoov <typ.Bool> x) x) | ||||||
| (GreaterEqualNoov (InvertFlags x)) => (OR (LessThanNoov <typ.Bool> x) (Equal <typ.Bool> x)) | (GreaterEqualNoov (InvertFlags x)) => (CSINC [OpARM64NotEqual] (LessThanNoov <typ.Bool> x) (MOVDconst [0]) x) | ||||||
| 
 | 
 | ||||||
| // Boolean-generating instructions (NOTE: NOT all boolean Values) always | // Boolean-generating instructions (NOTE: NOT all boolean Values) always | ||||||
| // zero upper bit of the register; no need to zero-extend | // zero upper bit of the register; no need to zero-extend | ||||||
|  |  | ||||||
|  | @ -30,3 +30,21 @@ func BenchmarkPhioptPass(b *testing.B) { | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | type Point struct { | ||||||
|  | 	X, Y int | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | //go:noinline | ||||||
|  | func sign(p1, p2, p3 Point) bool { | ||||||
|  | 	return (p1.X-p3.X)*(p2.Y-p3.Y)-(p2.X-p3.X)*(p1.Y-p3.Y) < 0 | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func BenchmarkInvertLessThanNoov(b *testing.B) { | ||||||
|  | 	p1 := Point{1, 2} | ||||||
|  | 	p2 := Point{2, 3} | ||||||
|  | 	p3 := Point{3, 4} | ||||||
|  | 	for i := 0; i < b.N; i++ { | ||||||
|  | 		sign(p1, p2, p3) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -5974,18 +5974,19 @@ func rewriteValueARM64_OpARM64GreaterEqualNoov(v *Value) bool { | ||||||
| 	b := v.Block | 	b := v.Block | ||||||
| 	typ := &b.Func.Config.Types | 	typ := &b.Func.Config.Types | ||||||
| 	// match: (GreaterEqualNoov (InvertFlags x)) | 	// match: (GreaterEqualNoov (InvertFlags x)) | ||||||
| 	// result: (OR (LessThanNoov <typ.Bool> x) (Equal <typ.Bool> x)) | 	// result: (CSINC [OpARM64NotEqual] (LessThanNoov <typ.Bool> x) (MOVDconst [0]) x) | ||||||
| 	for { | 	for { | ||||||
| 		if v_0.Op != OpARM64InvertFlags { | 		if v_0.Op != OpARM64InvertFlags { | ||||||
| 			break | 			break | ||||||
| 		} | 		} | ||||||
| 		x := v_0.Args[0] | 		x := v_0.Args[0] | ||||||
| 		v.reset(OpARM64OR) | 		v.reset(OpARM64CSINC) | ||||||
|  | 		v.AuxInt = opToAuxInt(OpARM64NotEqual) | ||||||
| 		v0 := b.NewValue0(v.Pos, OpARM64LessThanNoov, typ.Bool) | 		v0 := b.NewValue0(v.Pos, OpARM64LessThanNoov, typ.Bool) | ||||||
| 		v0.AddArg(x) | 		v0.AddArg(x) | ||||||
| 		v1 := b.NewValue0(v.Pos, OpARM64Equal, typ.Bool) | 		v1 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64) | ||||||
| 		v1.AddArg(x) | 		v1.AuxInt = int64ToAuxInt(0) | ||||||
| 		v.AddArg2(v0, v1) | 		v.AddArg3(v0, v1, x) | ||||||
| 		return true | 		return true | ||||||
| 	} | 	} | ||||||
| 	return false | 	return false | ||||||
|  | @ -6709,18 +6710,17 @@ func rewriteValueARM64_OpARM64LessThanNoov(v *Value) bool { | ||||||
| 	b := v.Block | 	b := v.Block | ||||||
| 	typ := &b.Func.Config.Types | 	typ := &b.Func.Config.Types | ||||||
| 	// match: (LessThanNoov (InvertFlags x)) | 	// match: (LessThanNoov (InvertFlags x)) | ||||||
| 	// result: (BIC (GreaterEqualNoov <typ.Bool> x) (Equal <typ.Bool> x)) | 	// result: (CSEL0 [OpARM64NotEqual] (GreaterEqualNoov <typ.Bool> x) x) | ||||||
| 	for { | 	for { | ||||||
| 		if v_0.Op != OpARM64InvertFlags { | 		if v_0.Op != OpARM64InvertFlags { | ||||||
| 			break | 			break | ||||||
| 		} | 		} | ||||||
| 		x := v_0.Args[0] | 		x := v_0.Args[0] | ||||||
| 		v.reset(OpARM64BIC) | 		v.reset(OpARM64CSEL0) | ||||||
|  | 		v.AuxInt = opToAuxInt(OpARM64NotEqual) | ||||||
| 		v0 := b.NewValue0(v.Pos, OpARM64GreaterEqualNoov, typ.Bool) | 		v0 := b.NewValue0(v.Pos, OpARM64GreaterEqualNoov, typ.Bool) | ||||||
| 		v0.AddArg(x) | 		v0.AddArg(x) | ||||||
| 		v1 := b.NewValue0(v.Pos, OpARM64Equal, typ.Bool) | 		v.AddArg2(v0, x) | ||||||
| 		v1.AddArg(x) |  | ||||||
| 		v.AddArg2(v0, v1) |  | ||||||
| 		return true | 		return true | ||||||
| 	} | 	} | ||||||
| 	return false | 	return false | ||||||
|  |  | ||||||
|  | @ -788,3 +788,16 @@ func cmp7() { | ||||||
| 	cmp5[string]("") // force instantiation | 	cmp5[string]("") // force instantiation | ||||||
| 	cmp6[string]("") // force instantiation | 	cmp6[string]("") // force instantiation | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | type Point struct { | ||||||
|  | 	X, Y int | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // invertLessThanNoov checks (LessThanNoov (InvertFlags x)) is lowered as | ||||||
|  | // CMP, CSET, CSEL instruction sequence. InvertFlags are only generated under | ||||||
|  | // certain conditions, see canonLessThan, so if the code below does not | ||||||
|  | // generate an InvertFlags OP, this check may fail. | ||||||
|  | func invertLessThanNoov(p1, p2, p3 Point) bool { | ||||||
|  | 	// arm64:`CMP`,`CSET`,`CSEL` | ||||||
|  | 	return (p1.X-p3.X)*(p2.Y-p3.Y)-(p2.X-p3.X)*(p1.Y-p3.Y) < 0 | ||||||
|  | } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 eric fang
						eric fang