| 
									
										
										
										
											2018-02-27 01:59:58 +01:00
										 |  |  | // asmcheck | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-03-02 21:06:09 +01:00
										 |  |  | // Copyright 2018 The Go Authors. All rights reserved. | 
					
						
							|  |  |  | // Use of this source code is governed by a BSD-style | 
					
						
							|  |  |  | // license that can be found in the LICENSE file. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-02-27 01:59:58 +01:00
										 |  |  | package codegen | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												cmd/compile: add patterns for bit set/clear/complement on amd64
This patch completes implementation of BT(Q|L), and adds support
for BT(S|R|C)(Q|L).
Example of code changes from time.(*Time).addSec:
        if t.wall&hasMonotonic != 0 {
  0x1073465               488b08                  MOVQ 0(AX), CX
  0x1073468               4889ca                  MOVQ CX, DX
  0x107346b               48c1e93f                SHRQ $0x3f, CX
  0x107346f               48c1e13f                SHLQ $0x3f, CX
  0x1073473               48f7c1ffffffff          TESTQ $-0x1, CX
  0x107347a               746b                    JE 0x10734e7
        if t.wall&hasMonotonic != 0 {
  0x1073435               488b08                  MOVQ 0(AX), CX
  0x1073438               480fbae13f              BTQ $0x3f, CX
  0x107343d               7363                    JAE 0x10734a2
Another example:
                        t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
  0x10734c8               4881e1ffffff3f          ANDQ $0x3fffffff, CX
  0x10734cf               48c1e61e                SHLQ $0x1e, SI
  0x10734d3               4809ce                  ORQ CX, SI
  0x10734d6               48b90000000000000080    MOVQ $0x8000000000000000, CX
  0x10734e0               4809f1                  ORQ SI, CX
  0x10734e3               488908                  MOVQ CX, 0(AX)
                        t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
  0x107348b		4881e2ffffff3f		ANDQ $0x3fffffff, DX
  0x1073492		48c1e61e		SHLQ $0x1e, SI
  0x1073496		4809f2			ORQ SI, DX
  0x1073499		480fbaea3f		BTSQ $0x3f, DX
  0x107349e		488910			MOVQ DX, 0(AX)
Go1 benchmarks seem unaffected, and I would be surprised
otherwise:
name                     old time/op    new time/op     delta
BinaryTree17-4              2.64s ± 4%      2.56s ± 9%  -2.92%  (p=0.008 n=9+9)
Fannkuch11-4                2.90s ± 1%      2.95s ± 3%  +1.76%  (p=0.010 n=10+9)
FmtFprintfEmpty-4          35.3ns ± 1%     34.5ns ± 2%  -2.34%  (p=0.004 n=9+8)
FmtFprintfString-4         57.0ns ± 1%     58.4ns ± 5%  +2.52%  (p=0.029 n=9+10)
FmtFprintfInt-4            59.8ns ± 3%     59.8ns ± 6%    ~     (p=0.565 n=10+10)
FmtFprintfIntInt-4         93.9ns ± 3%     91.2ns ± 5%  -2.94%  (p=0.014 n=10+9)
FmtFprintfPrefixedInt-4     107ns ± 6%      104ns ± 6%    ~     (p=0.099 n=10+10)
FmtFprintfFloat-4           187ns ± 3%      188ns ± 3%    ~     (p=0.505 n=10+9)
FmtManyArgs-4               410ns ± 1%      415ns ± 6%    ~     (p=0.649 n=8+10)
GobDecode-4                5.30ms ± 3%     5.27ms ± 3%    ~     (p=0.436 n=10+10)
GobEncode-4                4.62ms ± 5%     4.47ms ± 2%  -3.24%  (p=0.001 n=9+10)
Gzip-4                      197ms ± 4%      193ms ± 3%    ~     (p=0.123 n=10+10)
Gunzip-4                   30.4ms ± 3%     30.1ms ± 3%    ~     (p=0.481 n=10+10)
HTTPClientServer-4         76.3µs ± 1%     76.0µs ± 1%    ~     (p=0.236 n=8+9)
JSONEncode-4               10.5ms ± 9%     10.3ms ± 3%    ~     (p=0.280 n=10+10)
JSONDecode-4               42.3ms ±10%     41.3ms ± 2%    ~     (p=0.053 n=9+10)
Mandelbrot200-4            3.80ms ± 2%     3.72ms ± 2%  -2.15%  (p=0.001 n=9+10)
GoParse-4                  2.88ms ±10%     2.81ms ± 2%    ~     (p=0.247 n=10+10)
RegexpMatchEasy0_32-4      69.5ns ± 4%     68.6ns ± 2%    ~     (p=0.171 n=10+10)
RegexpMatchEasy0_1K-4       165ns ± 3%      162ns ± 3%    ~     (p=0.137 n=10+10)
RegexpMatchEasy1_32-4      65.7ns ± 6%     64.4ns ± 2%  -2.02%  (p=0.037 n=10+10)
RegexpMatchEasy1_1K-4       278ns ± 2%      279ns ± 3%    ~     (p=0.991 n=8+9)
RegexpMatchMedium_32-4     99.3ns ± 3%     98.5ns ± 4%    ~     (p=0.457 n=10+9)
RegexpMatchMedium_1K-4     30.1µs ± 1%     30.4µs ± 2%    ~     (p=0.173 n=8+10)
RegexpMatchHard_32-4       1.40µs ± 2%     1.41µs ± 4%    ~     (p=0.565 n=10+10)
RegexpMatchHard_1K-4       42.5µs ± 1%     41.5µs ± 3%  -2.13%  (p=0.002 n=8+9)
Revcomp-4                   332ms ± 4%      328ms ± 5%    ~     (p=0.720 n=9+10)
Template-4                 48.3ms ± 2%     49.6ms ± 3%  +2.56%  (p=0.002 n=8+10)
TimeParse-4                 252ns ± 2%      249ns ± 3%    ~     (p=0.116 n=9+10)
TimeFormat-4                262ns ± 4%      252ns ± 3%  -4.01%  (p=0.000 n=9+10)
name                     old speed      new speed       delta
GobDecode-4               145MB/s ± 3%    146MB/s ± 3%    ~     (p=0.436 n=10+10)
GobEncode-4               166MB/s ± 5%    172MB/s ± 2%  +3.28%  (p=0.001 n=9+10)
Gzip-4                   98.6MB/s ± 4%  100.4MB/s ± 3%    ~     (p=0.123 n=10+10)
Gunzip-4                  639MB/s ± 3%    645MB/s ± 3%    ~     (p=0.481 n=10+10)
JSONEncode-4              185MB/s ± 8%    189MB/s ± 3%    ~     (p=0.280 n=10+10)
JSONDecode-4             46.0MB/s ± 9%   47.0MB/s ± 2%  +2.21%  (p=0.046 n=9+10)
GoParse-4                20.1MB/s ± 9%   20.6MB/s ± 2%    ~     (p=0.239 n=10+10)
RegexpMatchEasy0_32-4     460MB/s ± 4%    467MB/s ± 2%    ~     (p=0.165 n=10+10)
RegexpMatchEasy0_1K-4    6.19GB/s ± 3%   6.28GB/s ± 3%    ~     (p=0.165 n=10+10)
RegexpMatchEasy1_32-4     487MB/s ± 5%    497MB/s ± 2%  +2.00%  (p=0.043 n=10+10)
RegexpMatchEasy1_1K-4    3.67GB/s ± 2%   3.67GB/s ± 3%    ~     (p=0.963 n=8+9)
RegexpMatchMedium_32-4   10.1MB/s ± 3%   10.1MB/s ± 4%    ~     (p=0.435 n=10+9)
RegexpMatchMedium_1K-4   34.0MB/s ± 1%   33.7MB/s ± 2%    ~     (p=0.173 n=8+10)
RegexpMatchHard_32-4     22.9MB/s ± 2%   22.7MB/s ± 4%    ~     (p=0.565 n=10+10)
RegexpMatchHard_1K-4     24.0MB/s ± 3%   24.7MB/s ± 3%  +2.64%  (p=0.001 n=9+9)
Revcomp-4                 766MB/s ± 4%    775MB/s ± 5%    ~     (p=0.720 n=9+10)
Template-4               40.2MB/s ± 2%   39.2MB/s ± 3%  -2.47%  (p=0.002 n=8+10)
The rules match ~1800 times during all.bash.
Fixes #18943
Change-Id: I64be1ada34e89c486dfd935bf429b35652117ed4
Reviewed-on: https://go-review.googlesource.com/94766
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
											
										 
											2018-02-17 13:54:03 +01:00
										 |  |  | /************************************ | 
					
						
							|  |  |  |  * 64-bit instructions | 
					
						
							|  |  |  |  ************************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func bitcheck64_constleft(a uint64) (n int) { | 
					
						
							|  |  |  | 	// amd64:"BTQ\t[$]63" | 
					
						
							|  |  |  | 	if a&(1<<63) != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTQ\t[$]60" | 
					
						
							|  |  |  | 	if a&(1<<60) != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]0" | 
					
						
							|  |  |  | 	if a&(1<<0) != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 0 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func bitcheck64_constright(a [8]uint64) (n int) { | 
					
						
							|  |  |  | 	// amd64:"BTQ\t[$]63" | 
					
						
							|  |  |  | 	if (a[0]>>63)&1 != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTQ\t[$]63" | 
					
						
							|  |  |  | 	if a[1]>>63 != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTQ\t[$]63" | 
					
						
							|  |  |  | 	if a[2]>>63 == 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTQ\t[$]60" | 
					
						
							|  |  |  | 	if (a[3]>>60)&1 == 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]1" | 
					
						
							|  |  |  | 	if (a[4]>>1)&1 == 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]0" | 
					
						
							|  |  |  | 	if (a[5]>>0)&1 == 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]7" | 
					
						
							|  |  |  | 	if (a[6]>>5)&4 == 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 0 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func bitcheck64_var(a, b uint64) (n int) { | 
					
						
							|  |  |  | 	// amd64:"BTQ" | 
					
						
							|  |  |  | 	if a&(1<<(b&63)) != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTQ",-"BT.\t[$]0" | 
					
						
							|  |  |  | 	if (b>>(a&63))&1 != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 0 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func bitcheck64_mask(a uint64) (n int) { | 
					
						
							|  |  |  | 	// amd64:"BTQ\t[$]63" | 
					
						
							|  |  |  | 	if a&0x8000000000000000 != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTQ\t[$]59" | 
					
						
							|  |  |  | 	if a&0x800000000000000 != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]0" | 
					
						
							|  |  |  | 	if a&0x1 != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 0 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func biton64(a, b uint64) (n uint64) { | 
					
						
							|  |  |  | 	// amd64:"BTSQ" | 
					
						
							|  |  |  | 	n += b | (1 << (a & 63)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"BTSQ\t[$]63" | 
					
						
							|  |  |  | 	n += a | (1 << 63) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"BTSQ\t[$]60" | 
					
						
							|  |  |  | 	n += a | (1 << 60) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"ORQ\t[$]1" | 
					
						
							|  |  |  | 	n += a | (1 << 0) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return n | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func bitoff64(a, b uint64) (n uint64) { | 
					
						
							|  |  |  | 	// amd64:"BTRQ" | 
					
						
							|  |  |  | 	n += b &^ (1 << (a & 63)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"BTRQ\t[$]63" | 
					
						
							|  |  |  | 	n += a &^ (1 << 63) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"BTRQ\t[$]60" | 
					
						
							|  |  |  | 	n += a &^ (1 << 60) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"ANDQ\t[$]-2" | 
					
						
							|  |  |  | 	n += a &^ (1 << 0) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return n | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func bitcompl64(a, b uint64) (n uint64) { | 
					
						
							|  |  |  | 	// amd64:"BTCQ" | 
					
						
							|  |  |  | 	n += b ^ (1 << (a & 63)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"BTCQ\t[$]63" | 
					
						
							|  |  |  | 	n += a ^ (1 << 63) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"BTCQ\t[$]60" | 
					
						
							|  |  |  | 	n += a ^ (1 << 60) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"XORQ\t[$]1" | 
					
						
							|  |  |  | 	n += a ^ (1 << 0) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return n | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /************************************ | 
					
						
							|  |  |  |  * 32-bit instructions | 
					
						
							|  |  |  |  ************************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func bitcheck32_constleft(a uint32) (n int) { | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]31" | 
					
						
							|  |  |  | 	if a&(1<<31) != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]28" | 
					
						
							|  |  |  | 	if a&(1<<28) != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]0" | 
					
						
							|  |  |  | 	if a&(1<<0) != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 0 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func bitcheck32_constright(a [8]uint32) (n int) { | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]31" | 
					
						
							|  |  |  | 	if (a[0]>>31)&1 != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]31" | 
					
						
							|  |  |  | 	if a[1]>>31 != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]31" | 
					
						
							|  |  |  | 	if a[2]>>31 == 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]28" | 
					
						
							|  |  |  | 	if (a[3]>>28)&1 == 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]1" | 
					
						
							|  |  |  | 	if (a[4]>>1)&1 == 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]0" | 
					
						
							|  |  |  | 	if (a[5]>>0)&1 == 0 { | 
					
						
							| 
									
										
										
										
											2018-02-27 01:59:58 +01:00
										 |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
											  
											
												cmd/compile: add patterns for bit set/clear/complement on amd64
This patch completes implementation of BT(Q|L), and adds support
for BT(S|R|C)(Q|L).
Example of code changes from time.(*Time).addSec:
        if t.wall&hasMonotonic != 0 {
  0x1073465               488b08                  MOVQ 0(AX), CX
  0x1073468               4889ca                  MOVQ CX, DX
  0x107346b               48c1e93f                SHRQ $0x3f, CX
  0x107346f               48c1e13f                SHLQ $0x3f, CX
  0x1073473               48f7c1ffffffff          TESTQ $-0x1, CX
  0x107347a               746b                    JE 0x10734e7
        if t.wall&hasMonotonic != 0 {
  0x1073435               488b08                  MOVQ 0(AX), CX
  0x1073438               480fbae13f              BTQ $0x3f, CX
  0x107343d               7363                    JAE 0x10734a2
Another example:
                        t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
  0x10734c8               4881e1ffffff3f          ANDQ $0x3fffffff, CX
  0x10734cf               48c1e61e                SHLQ $0x1e, SI
  0x10734d3               4809ce                  ORQ CX, SI
  0x10734d6               48b90000000000000080    MOVQ $0x8000000000000000, CX
  0x10734e0               4809f1                  ORQ SI, CX
  0x10734e3               488908                  MOVQ CX, 0(AX)
                        t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
  0x107348b		4881e2ffffff3f		ANDQ $0x3fffffff, DX
  0x1073492		48c1e61e		SHLQ $0x1e, SI
  0x1073496		4809f2			ORQ SI, DX
  0x1073499		480fbaea3f		BTSQ $0x3f, DX
  0x107349e		488910			MOVQ DX, 0(AX)
Go1 benchmarks seem unaffected, and I would be surprised
otherwise:
name                     old time/op    new time/op     delta
BinaryTree17-4              2.64s ± 4%      2.56s ± 9%  -2.92%  (p=0.008 n=9+9)
Fannkuch11-4                2.90s ± 1%      2.95s ± 3%  +1.76%  (p=0.010 n=10+9)
FmtFprintfEmpty-4          35.3ns ± 1%     34.5ns ± 2%  -2.34%  (p=0.004 n=9+8)
FmtFprintfString-4         57.0ns ± 1%     58.4ns ± 5%  +2.52%  (p=0.029 n=9+10)
FmtFprintfInt-4            59.8ns ± 3%     59.8ns ± 6%    ~     (p=0.565 n=10+10)
FmtFprintfIntInt-4         93.9ns ± 3%     91.2ns ± 5%  -2.94%  (p=0.014 n=10+9)
FmtFprintfPrefixedInt-4     107ns ± 6%      104ns ± 6%    ~     (p=0.099 n=10+10)
FmtFprintfFloat-4           187ns ± 3%      188ns ± 3%    ~     (p=0.505 n=10+9)
FmtManyArgs-4               410ns ± 1%      415ns ± 6%    ~     (p=0.649 n=8+10)
GobDecode-4                5.30ms ± 3%     5.27ms ± 3%    ~     (p=0.436 n=10+10)
GobEncode-4                4.62ms ± 5%     4.47ms ± 2%  -3.24%  (p=0.001 n=9+10)
Gzip-4                      197ms ± 4%      193ms ± 3%    ~     (p=0.123 n=10+10)
Gunzip-4                   30.4ms ± 3%     30.1ms ± 3%    ~     (p=0.481 n=10+10)
HTTPClientServer-4         76.3µs ± 1%     76.0µs ± 1%    ~     (p=0.236 n=8+9)
JSONEncode-4               10.5ms ± 9%     10.3ms ± 3%    ~     (p=0.280 n=10+10)
JSONDecode-4               42.3ms ±10%     41.3ms ± 2%    ~     (p=0.053 n=9+10)
Mandelbrot200-4            3.80ms ± 2%     3.72ms ± 2%  -2.15%  (p=0.001 n=9+10)
GoParse-4                  2.88ms ±10%     2.81ms ± 2%    ~     (p=0.247 n=10+10)
RegexpMatchEasy0_32-4      69.5ns ± 4%     68.6ns ± 2%    ~     (p=0.171 n=10+10)
RegexpMatchEasy0_1K-4       165ns ± 3%      162ns ± 3%    ~     (p=0.137 n=10+10)
RegexpMatchEasy1_32-4      65.7ns ± 6%     64.4ns ± 2%  -2.02%  (p=0.037 n=10+10)
RegexpMatchEasy1_1K-4       278ns ± 2%      279ns ± 3%    ~     (p=0.991 n=8+9)
RegexpMatchMedium_32-4     99.3ns ± 3%     98.5ns ± 4%    ~     (p=0.457 n=10+9)
RegexpMatchMedium_1K-4     30.1µs ± 1%     30.4µs ± 2%    ~     (p=0.173 n=8+10)
RegexpMatchHard_32-4       1.40µs ± 2%     1.41µs ± 4%    ~     (p=0.565 n=10+10)
RegexpMatchHard_1K-4       42.5µs ± 1%     41.5µs ± 3%  -2.13%  (p=0.002 n=8+9)
Revcomp-4                   332ms ± 4%      328ms ± 5%    ~     (p=0.720 n=9+10)
Template-4                 48.3ms ± 2%     49.6ms ± 3%  +2.56%  (p=0.002 n=8+10)
TimeParse-4                 252ns ± 2%      249ns ± 3%    ~     (p=0.116 n=9+10)
TimeFormat-4                262ns ± 4%      252ns ± 3%  -4.01%  (p=0.000 n=9+10)
name                     old speed      new speed       delta
GobDecode-4               145MB/s ± 3%    146MB/s ± 3%    ~     (p=0.436 n=10+10)
GobEncode-4               166MB/s ± 5%    172MB/s ± 2%  +3.28%  (p=0.001 n=9+10)
Gzip-4                   98.6MB/s ± 4%  100.4MB/s ± 3%    ~     (p=0.123 n=10+10)
Gunzip-4                  639MB/s ± 3%    645MB/s ± 3%    ~     (p=0.481 n=10+10)
JSONEncode-4              185MB/s ± 8%    189MB/s ± 3%    ~     (p=0.280 n=10+10)
JSONDecode-4             46.0MB/s ± 9%   47.0MB/s ± 2%  +2.21%  (p=0.046 n=9+10)
GoParse-4                20.1MB/s ± 9%   20.6MB/s ± 2%    ~     (p=0.239 n=10+10)
RegexpMatchEasy0_32-4     460MB/s ± 4%    467MB/s ± 2%    ~     (p=0.165 n=10+10)
RegexpMatchEasy0_1K-4    6.19GB/s ± 3%   6.28GB/s ± 3%    ~     (p=0.165 n=10+10)
RegexpMatchEasy1_32-4     487MB/s ± 5%    497MB/s ± 2%  +2.00%  (p=0.043 n=10+10)
RegexpMatchEasy1_1K-4    3.67GB/s ± 2%   3.67GB/s ± 3%    ~     (p=0.963 n=8+9)
RegexpMatchMedium_32-4   10.1MB/s ± 3%   10.1MB/s ± 4%    ~     (p=0.435 n=10+9)
RegexpMatchMedium_1K-4   34.0MB/s ± 1%   33.7MB/s ± 2%    ~     (p=0.173 n=8+10)
RegexpMatchHard_32-4     22.9MB/s ± 2%   22.7MB/s ± 4%    ~     (p=0.565 n=10+10)
RegexpMatchHard_1K-4     24.0MB/s ± 3%   24.7MB/s ± 3%  +2.64%  (p=0.001 n=9+9)
Revcomp-4                 766MB/s ± 4%    775MB/s ± 5%    ~     (p=0.720 n=9+10)
Template-4               40.2MB/s ± 2%   39.2MB/s ± 3%  -2.47%  (p=0.002 n=8+10)
The rules match ~1800 times during all.bash.
Fixes #18943
Change-Id: I64be1ada34e89c486dfd935bf429b35652117ed4
Reviewed-on: https://go-review.googlesource.com/94766
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
											
										 
											2018-02-17 13:54:03 +01:00
										 |  |  | 	// amd64:"BTL\t[$]7" | 
					
						
							|  |  |  | 	if (a[6]>>5)&4 == 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 0 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func bitcheck32_var(a, b uint32) (n int) { | 
					
						
							|  |  |  | 	// amd64:"BTL" | 
					
						
							|  |  |  | 	if a&(1<<(b&31)) != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL",-"BT.\t[$]0" | 
					
						
							|  |  |  | 	if (b>>(a&31))&1 != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 0 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func bitcheck32_mask(a uint32) (n int) { | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]31" | 
					
						
							|  |  |  | 	if a&0x80000000 != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]27" | 
					
						
							|  |  |  | 	if a&0x8000000 != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]0" | 
					
						
							|  |  |  | 	if a&0x1 != 0 { | 
					
						
							|  |  |  | 		return 1 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 0 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func biton32(a, b uint32) (n uint32) { | 
					
						
							|  |  |  | 	// amd64:"BTSL" | 
					
						
							|  |  |  | 	n += b | (1 << (a & 31)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"BTSL\t[$]31" | 
					
						
							|  |  |  | 	n += a | (1 << 31) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"BTSL\t[$]28" | 
					
						
							|  |  |  | 	n += a | (1 << 28) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"ORL\t[$]1" | 
					
						
							|  |  |  | 	n += a | (1 << 0) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return n | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func bitoff32(a, b uint32) (n uint32) { | 
					
						
							|  |  |  | 	// amd64:"BTRL" | 
					
						
							|  |  |  | 	n += b &^ (1 << (a & 31)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"BTRL\t[$]31" | 
					
						
							|  |  |  | 	n += a &^ (1 << 31) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"BTRL\t[$]28" | 
					
						
							|  |  |  | 	n += a &^ (1 << 28) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"ANDL\t[$]-2" | 
					
						
							|  |  |  | 	n += a &^ (1 << 0) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return n | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func bitcompl32(a, b uint32) (n uint32) { | 
					
						
							|  |  |  | 	// amd64:"BTCL" | 
					
						
							|  |  |  | 	n += b ^ (1 << (a & 31)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"BTCL\t[$]31" | 
					
						
							|  |  |  | 	n += a ^ (1 << 31) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"BTCL\t[$]28" | 
					
						
							|  |  |  | 	n += a ^ (1 << 28) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// amd64:"XORL\t[$]1" | 
					
						
							|  |  |  | 	n += a ^ (1 << 0) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return n | 
					
						
							| 
									
										
										
										
											2018-02-27 01:59:58 +01:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2018-04-10 11:20:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-27 02:46:17 +00:00
										 |  |  | // check direct operation on memory with constant source | 
					
						
							|  |  |  | func bitOpOnMem(a []uint32) { | 
					
						
							|  |  |  | 	// amd64:`ANDL\s[$]200,\s\([A-Z]+\)` | 
					
						
							|  |  |  | 	a[0] &= 200 | 
					
						
							|  |  |  | 	// amd64:`ORL\s[$]220,\s4\([A-Z]+\)` | 
					
						
							|  |  |  | 	a[1] |= 220 | 
					
						
							|  |  |  | 	// amd64:`XORL\s[$]240,\s8\([A-Z]+\)` | 
					
						
							|  |  |  | 	a[2] ^= 240 | 
					
						
							| 
									
										
										
										
											2018-09-17 02:05:22 +00:00
										 |  |  | 	// amd64:`BTRL\s[$]15,\s12\([A-Z]+\)`,-`ANDL` | 
					
						
							|  |  |  | 	a[3] &= 0xffff7fff | 
					
						
							|  |  |  | 	// amd64:`BTSL\s[$]14,\s16\([A-Z]+\)`,-`ORL` | 
					
						
							|  |  |  | 	a[4] |= 0x4000 | 
					
						
							|  |  |  | 	// amd64:`BTCL\s[$]13,\s20\([A-Z]+\)`,-`XORL` | 
					
						
							|  |  |  | 	a[5] ^= 0x2000 | 
					
						
							| 
									
										
										
										
											2018-06-27 02:46:17 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-20 14:43:30 -07:00
										 |  |  | func bitcheckMostNegative(b uint8) bool { | 
					
						
							|  |  |  | 	// amd64:"TESTB" | 
					
						
							|  |  |  | 	return b&0x80 == 0x80 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-04-10 11:20:20 +02:00
										 |  |  | // Check AND masking on arm64 (Issue #19857) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func and_mask_1(a uint64) uint64 { | 
					
						
							|  |  |  | 	// arm64:`AND\t` | 
					
						
							|  |  |  | 	return a & ((1 << 63) - 1) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func and_mask_2(a uint64) uint64 { | 
					
						
							|  |  |  | 	// arm64:`AND\t` | 
					
						
							|  |  |  | 	return a & (1 << 63) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-10 08:29:52 +00:00
										 |  |  | func and_mask_3(a, b uint32) (uint32, uint32) { | 
					
						
							| 
									
										
										
										
											2018-07-11 01:30:32 +00:00
										 |  |  | 	// arm/7:`BIC`,-`AND` | 
					
						
							| 
									
										
										
										
											2018-09-10 08:29:52 +00:00
										 |  |  | 	a &= 0xffffaaaa | 
					
						
							|  |  |  | 	// arm/7:`BFC`,-`AND`,-`BIC` | 
					
						
							|  |  |  | 	b &= 0xffc003ff | 
					
						
							|  |  |  | 	return a, b | 
					
						
							| 
									
										
										
										
											2018-07-11 01:30:32 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-04-10 11:20:20 +02:00
										 |  |  | // Check generation of arm64 BIC/EON/ORN instructions | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func op_bic(x, y uint32) uint32 { | 
					
						
							|  |  |  | 	// arm64:`BIC\t`,-`AND` | 
					
						
							|  |  |  | 	return x &^ y | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-05 03:53:53 +00:00
										 |  |  | func op_eon(x, y, z uint32, a []uint32, n, m uint64) uint64 { | 
					
						
							|  |  |  | 	// arm64:`EON\t`,-`EOR`,-`MVN` | 
					
						
							|  |  |  | 	a[0] = x ^ (y ^ 0xffffffff) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// arm64:`EON\t`,-`EOR`,-`MVN` | 
					
						
							|  |  |  | 	a[1] = ^(y ^ z) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-04-10 11:20:20 +02:00
										 |  |  | 	// arm64:`EON\t`,-`XOR` | 
					
						
							| 
									
										
										
										
											2020-06-05 03:53:53 +00:00
										 |  |  | 	a[2] = x ^ ^z | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// arm64:`EON\t`,-`EOR`,-`MVN` | 
					
						
							|  |  |  | 	return n ^ (m ^ 0xffffffffffffffff) | 
					
						
							| 
									
										
										
										
											2018-04-10 11:20:20 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func op_orn(x, y uint32) uint32 { | 
					
						
							|  |  |  | 	// arm64:`ORN\t`,-`ORR` | 
					
						
							|  |  |  | 	return x | ^y | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2019-05-08 17:02:23 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  | // check bitsets | 
					
						
							|  |  |  | func bitSetPowerOf2Test(x int) bool { | 
					
						
							|  |  |  | 	// amd64:"BTL\t[$]3" | 
					
						
							|  |  |  | 	return x&8 == 8 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func bitSetTest(x int) bool { | 
					
						
							|  |  |  | 	// amd64:"ANDQ\t[$]9, AX" | 
					
						
							|  |  |  | 	// amd64:"CMPQ\tAX, [$]9" | 
					
						
							|  |  |  | 	return x&9 == 9 | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2020-05-11 09:44:48 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | // mask contiguous one bits | 
					
						
							|  |  |  | func cont1Mask64U(x uint64) uint64 { | 
					
						
							|  |  |  | 	// s390x:"RISBGZ\t[$]16, [$]47, [$]0," | 
					
						
							| 
									
										
										
										
											2020-11-08 09:44:33 +01:00
										 |  |  | 	return x & 0x0000ffffffff0000 | 
					
						
							| 
									
										
										
										
											2020-05-11 09:44:48 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // mask contiguous zero bits | 
					
						
							|  |  |  | func cont0Mask64U(x uint64) uint64 { | 
					
						
							|  |  |  | 	// s390x:"RISBGZ\t[$]48, [$]15, [$]0," | 
					
						
							| 
									
										
										
										
											2020-11-08 09:44:33 +01:00
										 |  |  | 	return x & 0xffff00000000ffff | 
					
						
							| 
									
										
										
										
											2020-05-11 09:44:48 -07:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2021-02-12 15:55:25 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  | func issue44228a(a []int64, i int) bool { | 
					
						
							|  |  |  | 	// amd64: "BTQ", -"SHL" | 
					
						
							|  |  |  | 	return a[i>>6]&(1<<(i&63)) != 0 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | func issue44228b(a []int32, i int) bool { | 
					
						
							|  |  |  | 	// amd64: "BTL", -"SHL" | 
					
						
							|  |  |  | 	return a[i>>5]&(1<<(i&31)) != 0 | 
					
						
							|  |  |  | } |