| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | // asmcheck | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Copyright 2018 The Go Authors. All rights reserved. | 
					
						
							|  |  |  | // Use of this source code is governed by a BSD-style | 
					
						
							|  |  |  | // license that can be found in the LICENSE file. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package codegen | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import "math" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var sink64 [8]float64 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func approx(x float64) { | 
					
						
							|  |  |  | 	// s390x:"FIDBR\t[$]6" | 
					
						
							|  |  |  | 	// arm64:"FRINTPD" | 
					
						
							| 
									
										
										
										
											2018-10-15 12:53:07 -04:00
										 |  |  | 	// ppc64:"FRIP" | 
					
						
							| 
									
										
										
										
											2018-03-02 16:47:54 -03:00
										 |  |  | 	// ppc64le:"FRIP" | 
					
						
							| 
									
										
										
										
											2019-03-05 01:56:17 +01:00
										 |  |  | 	// wasm:"F64Ceil" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	sink64[0] = math.Ceil(x) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// s390x:"FIDBR\t[$]7" | 
					
						
							|  |  |  | 	// arm64:"FRINTMD" | 
					
						
							| 
									
										
										
										
											2018-10-15 12:53:07 -04:00
										 |  |  | 	// ppc64:"FRIM" | 
					
						
							| 
									
										
										
										
											2018-03-02 16:47:54 -03:00
										 |  |  | 	// ppc64le:"FRIM" | 
					
						
							| 
									
										
										
										
											2019-03-05 01:56:17 +01:00
										 |  |  | 	// wasm:"F64Floor" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	sink64[1] = math.Floor(x) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// s390x:"FIDBR\t[$]1" | 
					
						
							|  |  |  | 	// arm64:"FRINTAD" | 
					
						
							| 
									
										
										
										
											2018-10-15 12:53:07 -04:00
										 |  |  | 	// ppc64:"FRIN" | 
					
						
							| 
									
										
										
										
											2018-03-02 16:47:54 -03:00
										 |  |  | 	// ppc64le:"FRIN" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	sink64[2] = math.Round(x) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// s390x:"FIDBR\t[$]5" | 
					
						
							|  |  |  | 	// arm64:"FRINTZD" | 
					
						
							| 
									
										
										
										
											2018-10-15 12:53:07 -04:00
										 |  |  | 	// ppc64:"FRIZ" | 
					
						
							| 
									
										
										
										
											2018-03-02 16:47:54 -03:00
										 |  |  | 	// ppc64le:"FRIZ" | 
					
						
							| 
									
										
										
										
											2019-03-05 01:56:17 +01:00
										 |  |  | 	// wasm:"F64Trunc" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	sink64[3] = math.Trunc(x) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// s390x:"FIDBR\t[$]4" | 
					
						
							| 
									
										
										
										
											2018-05-22 06:58:32 +00:00
										 |  |  | 	// arm64:"FRINTND" | 
					
						
							| 
									
										
										
										
											2019-03-05 01:56:17 +01:00
										 |  |  | 	// wasm:"F64Nearest" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	sink64[4] = math.RoundToEven(x) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func sqrt(x float64) float64 { | 
					
						
							|  |  |  | 	// amd64:"SQRTSD" | 
					
						
							| 
									
										
										
										
											2018-04-15 19:00:27 +02:00
										 |  |  | 	// 386/387:"FSQRT" 386/sse2:"SQRTSD" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	// arm64:"FSQRTD" | 
					
						
							| 
									
										
										
										
											2018-04-15 19:00:27 +02:00
										 |  |  | 	// arm/7:"SQRTD" | 
					
						
							| 
									
										
										
										
											2018-04-26 15:37:27 +02:00
										 |  |  | 	// mips/hardfloat:"SQRTD" mips/softfloat:-"SQRTD" | 
					
						
							|  |  |  | 	// mips64/hardfloat:"SQRTD" mips64/softfloat:-"SQRTD" | 
					
						
							| 
									
										
										
										
											2019-03-05 01:56:17 +01:00
										 |  |  | 	// wasm:"F64Sqrt" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	return math.Sqrt(x) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Check that it's using integer registers | 
					
						
							|  |  |  | func abs(x, y float64) { | 
					
						
							| 
									
										
											  
											
												cmd/compile: add patterns for bit set/clear/complement on amd64
This patch completes implementation of BT(Q|L), and adds support
for BT(S|R|C)(Q|L).
Example of code changes from time.(*Time).addSec:
        if t.wall&hasMonotonic != 0 {
  0x1073465               488b08                  MOVQ 0(AX), CX
  0x1073468               4889ca                  MOVQ CX, DX
  0x107346b               48c1e93f                SHRQ $0x3f, CX
  0x107346f               48c1e13f                SHLQ $0x3f, CX
  0x1073473               48f7c1ffffffff          TESTQ $-0x1, CX
  0x107347a               746b                    JE 0x10734e7
        if t.wall&hasMonotonic != 0 {
  0x1073435               488b08                  MOVQ 0(AX), CX
  0x1073438               480fbae13f              BTQ $0x3f, CX
  0x107343d               7363                    JAE 0x10734a2
Another example:
                        t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
  0x10734c8               4881e1ffffff3f          ANDQ $0x3fffffff, CX
  0x10734cf               48c1e61e                SHLQ $0x1e, SI
  0x10734d3               4809ce                  ORQ CX, SI
  0x10734d6               48b90000000000000080    MOVQ $0x8000000000000000, CX
  0x10734e0               4809f1                  ORQ SI, CX
  0x10734e3               488908                  MOVQ CX, 0(AX)
                        t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
  0x107348b		4881e2ffffff3f		ANDQ $0x3fffffff, DX
  0x1073492		48c1e61e		SHLQ $0x1e, SI
  0x1073496		4809f2			ORQ SI, DX
  0x1073499		480fbaea3f		BTSQ $0x3f, DX
  0x107349e		488910			MOVQ DX, 0(AX)
Go1 benchmarks seem unaffected, and I would be surprised
otherwise:
name                     old time/op    new time/op     delta
BinaryTree17-4              2.64s ± 4%      2.56s ± 9%  -2.92%  (p=0.008 n=9+9)
Fannkuch11-4                2.90s ± 1%      2.95s ± 3%  +1.76%  (p=0.010 n=10+9)
FmtFprintfEmpty-4          35.3ns ± 1%     34.5ns ± 2%  -2.34%  (p=0.004 n=9+8)
FmtFprintfString-4         57.0ns ± 1%     58.4ns ± 5%  +2.52%  (p=0.029 n=9+10)
FmtFprintfInt-4            59.8ns ± 3%     59.8ns ± 6%    ~     (p=0.565 n=10+10)
FmtFprintfIntInt-4         93.9ns ± 3%     91.2ns ± 5%  -2.94%  (p=0.014 n=10+9)
FmtFprintfPrefixedInt-4     107ns ± 6%      104ns ± 6%    ~     (p=0.099 n=10+10)
FmtFprintfFloat-4           187ns ± 3%      188ns ± 3%    ~     (p=0.505 n=10+9)
FmtManyArgs-4               410ns ± 1%      415ns ± 6%    ~     (p=0.649 n=8+10)
GobDecode-4                5.30ms ± 3%     5.27ms ± 3%    ~     (p=0.436 n=10+10)
GobEncode-4                4.62ms ± 5%     4.47ms ± 2%  -3.24%  (p=0.001 n=9+10)
Gzip-4                      197ms ± 4%      193ms ± 3%    ~     (p=0.123 n=10+10)
Gunzip-4                   30.4ms ± 3%     30.1ms ± 3%    ~     (p=0.481 n=10+10)
HTTPClientServer-4         76.3µs ± 1%     76.0µs ± 1%    ~     (p=0.236 n=8+9)
JSONEncode-4               10.5ms ± 9%     10.3ms ± 3%    ~     (p=0.280 n=10+10)
JSONDecode-4               42.3ms ±10%     41.3ms ± 2%    ~     (p=0.053 n=9+10)
Mandelbrot200-4            3.80ms ± 2%     3.72ms ± 2%  -2.15%  (p=0.001 n=9+10)
GoParse-4                  2.88ms ±10%     2.81ms ± 2%    ~     (p=0.247 n=10+10)
RegexpMatchEasy0_32-4      69.5ns ± 4%     68.6ns ± 2%    ~     (p=0.171 n=10+10)
RegexpMatchEasy0_1K-4       165ns ± 3%      162ns ± 3%    ~     (p=0.137 n=10+10)
RegexpMatchEasy1_32-4      65.7ns ± 6%     64.4ns ± 2%  -2.02%  (p=0.037 n=10+10)
RegexpMatchEasy1_1K-4       278ns ± 2%      279ns ± 3%    ~     (p=0.991 n=8+9)
RegexpMatchMedium_32-4     99.3ns ± 3%     98.5ns ± 4%    ~     (p=0.457 n=10+9)
RegexpMatchMedium_1K-4     30.1µs ± 1%     30.4µs ± 2%    ~     (p=0.173 n=8+10)
RegexpMatchHard_32-4       1.40µs ± 2%     1.41µs ± 4%    ~     (p=0.565 n=10+10)
RegexpMatchHard_1K-4       42.5µs ± 1%     41.5µs ± 3%  -2.13%  (p=0.002 n=8+9)
Revcomp-4                   332ms ± 4%      328ms ± 5%    ~     (p=0.720 n=9+10)
Template-4                 48.3ms ± 2%     49.6ms ± 3%  +2.56%  (p=0.002 n=8+10)
TimeParse-4                 252ns ± 2%      249ns ± 3%    ~     (p=0.116 n=9+10)
TimeFormat-4                262ns ± 4%      252ns ± 3%  -4.01%  (p=0.000 n=9+10)
name                     old speed      new speed       delta
GobDecode-4               145MB/s ± 3%    146MB/s ± 3%    ~     (p=0.436 n=10+10)
GobEncode-4               166MB/s ± 5%    172MB/s ± 2%  +3.28%  (p=0.001 n=9+10)
Gzip-4                   98.6MB/s ± 4%  100.4MB/s ± 3%    ~     (p=0.123 n=10+10)
Gunzip-4                  639MB/s ± 3%    645MB/s ± 3%    ~     (p=0.481 n=10+10)
JSONEncode-4              185MB/s ± 8%    189MB/s ± 3%    ~     (p=0.280 n=10+10)
JSONDecode-4             46.0MB/s ± 9%   47.0MB/s ± 2%  +2.21%  (p=0.046 n=9+10)
GoParse-4                20.1MB/s ± 9%   20.6MB/s ± 2%    ~     (p=0.239 n=10+10)
RegexpMatchEasy0_32-4     460MB/s ± 4%    467MB/s ± 2%    ~     (p=0.165 n=10+10)
RegexpMatchEasy0_1K-4    6.19GB/s ± 3%   6.28GB/s ± 3%    ~     (p=0.165 n=10+10)
RegexpMatchEasy1_32-4     487MB/s ± 5%    497MB/s ± 2%  +2.00%  (p=0.043 n=10+10)
RegexpMatchEasy1_1K-4    3.67GB/s ± 2%   3.67GB/s ± 3%    ~     (p=0.963 n=8+9)
RegexpMatchMedium_32-4   10.1MB/s ± 3%   10.1MB/s ± 4%    ~     (p=0.435 n=10+9)
RegexpMatchMedium_1K-4   34.0MB/s ± 1%   33.7MB/s ± 2%    ~     (p=0.173 n=8+10)
RegexpMatchHard_32-4     22.9MB/s ± 2%   22.7MB/s ± 4%    ~     (p=0.565 n=10+10)
RegexpMatchHard_1K-4     24.0MB/s ± 3%   24.7MB/s ± 3%  +2.64%  (p=0.001 n=9+9)
Revcomp-4                 766MB/s ± 4%    775MB/s ± 5%    ~     (p=0.720 n=9+10)
Template-4               40.2MB/s ± 2%   39.2MB/s ± 3%  -2.47%  (p=0.002 n=8+10)
The rules match ~1800 times during all.bash.
Fixes #18943
Change-Id: I64be1ada34e89c486dfd935bf429b35652117ed4
Reviewed-on: https://go-review.googlesource.com/94766
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
											
										 
											2018-02-17 13:54:03 +01:00
										 |  |  | 	// amd64:"BTRQ\t[$]63" | 
					
						
							| 
									
										
										
										
											2018-05-22 06:58:32 +00:00
										 |  |  | 	// arm64:"FABSD\t" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	// s390x:"LPDFR\t",-"MOVD\t"     (no integer load/store) | 
					
						
							| 
									
										
										
										
											2018-10-15 12:53:07 -04:00
										 |  |  | 	// ppc64:"FABS\t" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	// ppc64le:"FABS\t" | 
					
						
							| 
									
										
										
										
											2019-03-05 01:56:17 +01:00
										 |  |  | 	// wasm:"F64Abs" | 
					
						
							| 
									
										
										
										
											2019-08-02 02:41:59 +00:00
										 |  |  | 	// arm/6:"ABSD\t" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	sink64[0] = math.Abs(x) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												cmd/compile: add patterns for bit set/clear/complement on amd64
This patch completes implementation of BT(Q|L), and adds support
for BT(S|R|C)(Q|L).
Example of code changes from time.(*Time).addSec:
        if t.wall&hasMonotonic != 0 {
  0x1073465               488b08                  MOVQ 0(AX), CX
  0x1073468               4889ca                  MOVQ CX, DX
  0x107346b               48c1e93f                SHRQ $0x3f, CX
  0x107346f               48c1e13f                SHLQ $0x3f, CX
  0x1073473               48f7c1ffffffff          TESTQ $-0x1, CX
  0x107347a               746b                    JE 0x10734e7
        if t.wall&hasMonotonic != 0 {
  0x1073435               488b08                  MOVQ 0(AX), CX
  0x1073438               480fbae13f              BTQ $0x3f, CX
  0x107343d               7363                    JAE 0x10734a2
Another example:
                        t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
  0x10734c8               4881e1ffffff3f          ANDQ $0x3fffffff, CX
  0x10734cf               48c1e61e                SHLQ $0x1e, SI
  0x10734d3               4809ce                  ORQ CX, SI
  0x10734d6               48b90000000000000080    MOVQ $0x8000000000000000, CX
  0x10734e0               4809f1                  ORQ SI, CX
  0x10734e3               488908                  MOVQ CX, 0(AX)
                        t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
  0x107348b		4881e2ffffff3f		ANDQ $0x3fffffff, DX
  0x1073492		48c1e61e		SHLQ $0x1e, SI
  0x1073496		4809f2			ORQ SI, DX
  0x1073499		480fbaea3f		BTSQ $0x3f, DX
  0x107349e		488910			MOVQ DX, 0(AX)
Go1 benchmarks seem unaffected, and I would be surprised
otherwise:
name                     old time/op    new time/op     delta
BinaryTree17-4              2.64s ± 4%      2.56s ± 9%  -2.92%  (p=0.008 n=9+9)
Fannkuch11-4                2.90s ± 1%      2.95s ± 3%  +1.76%  (p=0.010 n=10+9)
FmtFprintfEmpty-4          35.3ns ± 1%     34.5ns ± 2%  -2.34%  (p=0.004 n=9+8)
FmtFprintfString-4         57.0ns ± 1%     58.4ns ± 5%  +2.52%  (p=0.029 n=9+10)
FmtFprintfInt-4            59.8ns ± 3%     59.8ns ± 6%    ~     (p=0.565 n=10+10)
FmtFprintfIntInt-4         93.9ns ± 3%     91.2ns ± 5%  -2.94%  (p=0.014 n=10+9)
FmtFprintfPrefixedInt-4     107ns ± 6%      104ns ± 6%    ~     (p=0.099 n=10+10)
FmtFprintfFloat-4           187ns ± 3%      188ns ± 3%    ~     (p=0.505 n=10+9)
FmtManyArgs-4               410ns ± 1%      415ns ± 6%    ~     (p=0.649 n=8+10)
GobDecode-4                5.30ms ± 3%     5.27ms ± 3%    ~     (p=0.436 n=10+10)
GobEncode-4                4.62ms ± 5%     4.47ms ± 2%  -3.24%  (p=0.001 n=9+10)
Gzip-4                      197ms ± 4%      193ms ± 3%    ~     (p=0.123 n=10+10)
Gunzip-4                   30.4ms ± 3%     30.1ms ± 3%    ~     (p=0.481 n=10+10)
HTTPClientServer-4         76.3µs ± 1%     76.0µs ± 1%    ~     (p=0.236 n=8+9)
JSONEncode-4               10.5ms ± 9%     10.3ms ± 3%    ~     (p=0.280 n=10+10)
JSONDecode-4               42.3ms ±10%     41.3ms ± 2%    ~     (p=0.053 n=9+10)
Mandelbrot200-4            3.80ms ± 2%     3.72ms ± 2%  -2.15%  (p=0.001 n=9+10)
GoParse-4                  2.88ms ±10%     2.81ms ± 2%    ~     (p=0.247 n=10+10)
RegexpMatchEasy0_32-4      69.5ns ± 4%     68.6ns ± 2%    ~     (p=0.171 n=10+10)
RegexpMatchEasy0_1K-4       165ns ± 3%      162ns ± 3%    ~     (p=0.137 n=10+10)
RegexpMatchEasy1_32-4      65.7ns ± 6%     64.4ns ± 2%  -2.02%  (p=0.037 n=10+10)
RegexpMatchEasy1_1K-4       278ns ± 2%      279ns ± 3%    ~     (p=0.991 n=8+9)
RegexpMatchMedium_32-4     99.3ns ± 3%     98.5ns ± 4%    ~     (p=0.457 n=10+9)
RegexpMatchMedium_1K-4     30.1µs ± 1%     30.4µs ± 2%    ~     (p=0.173 n=8+10)
RegexpMatchHard_32-4       1.40µs ± 2%     1.41µs ± 4%    ~     (p=0.565 n=10+10)
RegexpMatchHard_1K-4       42.5µs ± 1%     41.5µs ± 3%  -2.13%  (p=0.002 n=8+9)
Revcomp-4                   332ms ± 4%      328ms ± 5%    ~     (p=0.720 n=9+10)
Template-4                 48.3ms ± 2%     49.6ms ± 3%  +2.56%  (p=0.002 n=8+10)
TimeParse-4                 252ns ± 2%      249ns ± 3%    ~     (p=0.116 n=9+10)
TimeFormat-4                262ns ± 4%      252ns ± 3%  -4.01%  (p=0.000 n=9+10)
name                     old speed      new speed       delta
GobDecode-4               145MB/s ± 3%    146MB/s ± 3%    ~     (p=0.436 n=10+10)
GobEncode-4               166MB/s ± 5%    172MB/s ± 2%  +3.28%  (p=0.001 n=9+10)
Gzip-4                   98.6MB/s ± 4%  100.4MB/s ± 3%    ~     (p=0.123 n=10+10)
Gunzip-4                  639MB/s ± 3%    645MB/s ± 3%    ~     (p=0.481 n=10+10)
JSONEncode-4              185MB/s ± 8%    189MB/s ± 3%    ~     (p=0.280 n=10+10)
JSONDecode-4             46.0MB/s ± 9%   47.0MB/s ± 2%  +2.21%  (p=0.046 n=9+10)
GoParse-4                20.1MB/s ± 9%   20.6MB/s ± 2%    ~     (p=0.239 n=10+10)
RegexpMatchEasy0_32-4     460MB/s ± 4%    467MB/s ± 2%    ~     (p=0.165 n=10+10)
RegexpMatchEasy0_1K-4    6.19GB/s ± 3%   6.28GB/s ± 3%    ~     (p=0.165 n=10+10)
RegexpMatchEasy1_32-4     487MB/s ± 5%    497MB/s ± 2%  +2.00%  (p=0.043 n=10+10)
RegexpMatchEasy1_1K-4    3.67GB/s ± 2%   3.67GB/s ± 3%    ~     (p=0.963 n=8+9)
RegexpMatchMedium_32-4   10.1MB/s ± 3%   10.1MB/s ± 4%    ~     (p=0.435 n=10+9)
RegexpMatchMedium_1K-4   34.0MB/s ± 1%   33.7MB/s ± 2%    ~     (p=0.173 n=8+10)
RegexpMatchHard_32-4     22.9MB/s ± 2%   22.7MB/s ± 4%    ~     (p=0.565 n=10+10)
RegexpMatchHard_1K-4     24.0MB/s ± 3%   24.7MB/s ± 3%  +2.64%  (p=0.001 n=9+9)
Revcomp-4                 766MB/s ± 4%    775MB/s ± 5%    ~     (p=0.720 n=9+10)
Template-4               40.2MB/s ± 2%   39.2MB/s ± 3%  -2.47%  (p=0.002 n=8+10)
The rules match ~1800 times during all.bash.
Fixes #18943
Change-Id: I64be1ada34e89c486dfd935bf429b35652117ed4
Reviewed-on: https://go-review.googlesource.com/94766
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
											
										 
											2018-02-17 13:54:03 +01:00
										 |  |  | 	// amd64:"BTRQ\t[$]63","PXOR"    (TODO: this should be BTSQ) | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	// s390x:"LNDFR\t",-"MOVD\t"     (no integer load/store) | 
					
						
							| 
									
										
										
										
											2018-10-15 12:53:07 -04:00
										 |  |  | 	// ppc64:"FNABS\t" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	// ppc64le:"FNABS\t" | 
					
						
							|  |  |  | 	sink64[1] = -math.Abs(y) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Check that it's using integer registers | 
					
						
							|  |  |  | func abs32(x float32) float32 { | 
					
						
							|  |  |  | 	// s390x:"LPDFR",-"LDEBR",-"LEDBR"     (no float64 conversion) | 
					
						
							|  |  |  | 	return float32(math.Abs(float64(x))) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Check that it's using integer registers | 
					
						
							|  |  |  | func copysign(a, b, c float64) { | 
					
						
							| 
									
										
										
										
											2019-09-09 17:50:35 +02:00
										 |  |  | 	// amd64:"BTRQ\t[$]63","ANDQ","ORQ" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	// s390x:"CPSDR",-"MOVD"         (no integer load/store) | 
					
						
							| 
									
										
										
										
											2018-10-15 12:53:07 -04:00
										 |  |  | 	// ppc64:"FCPSGN" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	// ppc64le:"FCPSGN" | 
					
						
							| 
									
										
										
										
											2019-03-05 01:56:17 +01:00
										 |  |  | 	// wasm:"F64Copysign" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	sink64[0] = math.Copysign(a, b) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												cmd/compile: add patterns for bit set/clear/complement on amd64
This patch completes implementation of BT(Q|L), and adds support
for BT(S|R|C)(Q|L).
Example of code changes from time.(*Time).addSec:
        if t.wall&hasMonotonic != 0 {
  0x1073465               488b08                  MOVQ 0(AX), CX
  0x1073468               4889ca                  MOVQ CX, DX
  0x107346b               48c1e93f                SHRQ $0x3f, CX
  0x107346f               48c1e13f                SHLQ $0x3f, CX
  0x1073473               48f7c1ffffffff          TESTQ $-0x1, CX
  0x107347a               746b                    JE 0x10734e7
        if t.wall&hasMonotonic != 0 {
  0x1073435               488b08                  MOVQ 0(AX), CX
  0x1073438               480fbae13f              BTQ $0x3f, CX
  0x107343d               7363                    JAE 0x10734a2
Another example:
                        t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
  0x10734c8               4881e1ffffff3f          ANDQ $0x3fffffff, CX
  0x10734cf               48c1e61e                SHLQ $0x1e, SI
  0x10734d3               4809ce                  ORQ CX, SI
  0x10734d6               48b90000000000000080    MOVQ $0x8000000000000000, CX
  0x10734e0               4809f1                  ORQ SI, CX
  0x10734e3               488908                  MOVQ CX, 0(AX)
                        t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
  0x107348b		4881e2ffffff3f		ANDQ $0x3fffffff, DX
  0x1073492		48c1e61e		SHLQ $0x1e, SI
  0x1073496		4809f2			ORQ SI, DX
  0x1073499		480fbaea3f		BTSQ $0x3f, DX
  0x107349e		488910			MOVQ DX, 0(AX)
Go1 benchmarks seem unaffected, and I would be surprised
otherwise:
name                     old time/op    new time/op     delta
BinaryTree17-4              2.64s ± 4%      2.56s ± 9%  -2.92%  (p=0.008 n=9+9)
Fannkuch11-4                2.90s ± 1%      2.95s ± 3%  +1.76%  (p=0.010 n=10+9)
FmtFprintfEmpty-4          35.3ns ± 1%     34.5ns ± 2%  -2.34%  (p=0.004 n=9+8)
FmtFprintfString-4         57.0ns ± 1%     58.4ns ± 5%  +2.52%  (p=0.029 n=9+10)
FmtFprintfInt-4            59.8ns ± 3%     59.8ns ± 6%    ~     (p=0.565 n=10+10)
FmtFprintfIntInt-4         93.9ns ± 3%     91.2ns ± 5%  -2.94%  (p=0.014 n=10+9)
FmtFprintfPrefixedInt-4     107ns ± 6%      104ns ± 6%    ~     (p=0.099 n=10+10)
FmtFprintfFloat-4           187ns ± 3%      188ns ± 3%    ~     (p=0.505 n=10+9)
FmtManyArgs-4               410ns ± 1%      415ns ± 6%    ~     (p=0.649 n=8+10)
GobDecode-4                5.30ms ± 3%     5.27ms ± 3%    ~     (p=0.436 n=10+10)
GobEncode-4                4.62ms ± 5%     4.47ms ± 2%  -3.24%  (p=0.001 n=9+10)
Gzip-4                      197ms ± 4%      193ms ± 3%    ~     (p=0.123 n=10+10)
Gunzip-4                   30.4ms ± 3%     30.1ms ± 3%    ~     (p=0.481 n=10+10)
HTTPClientServer-4         76.3µs ± 1%     76.0µs ± 1%    ~     (p=0.236 n=8+9)
JSONEncode-4               10.5ms ± 9%     10.3ms ± 3%    ~     (p=0.280 n=10+10)
JSONDecode-4               42.3ms ±10%     41.3ms ± 2%    ~     (p=0.053 n=9+10)
Mandelbrot200-4            3.80ms ± 2%     3.72ms ± 2%  -2.15%  (p=0.001 n=9+10)
GoParse-4                  2.88ms ±10%     2.81ms ± 2%    ~     (p=0.247 n=10+10)
RegexpMatchEasy0_32-4      69.5ns ± 4%     68.6ns ± 2%    ~     (p=0.171 n=10+10)
RegexpMatchEasy0_1K-4       165ns ± 3%      162ns ± 3%    ~     (p=0.137 n=10+10)
RegexpMatchEasy1_32-4      65.7ns ± 6%     64.4ns ± 2%  -2.02%  (p=0.037 n=10+10)
RegexpMatchEasy1_1K-4       278ns ± 2%      279ns ± 3%    ~     (p=0.991 n=8+9)
RegexpMatchMedium_32-4     99.3ns ± 3%     98.5ns ± 4%    ~     (p=0.457 n=10+9)
RegexpMatchMedium_1K-4     30.1µs ± 1%     30.4µs ± 2%    ~     (p=0.173 n=8+10)
RegexpMatchHard_32-4       1.40µs ± 2%     1.41µs ± 4%    ~     (p=0.565 n=10+10)
RegexpMatchHard_1K-4       42.5µs ± 1%     41.5µs ± 3%  -2.13%  (p=0.002 n=8+9)
Revcomp-4                   332ms ± 4%      328ms ± 5%    ~     (p=0.720 n=9+10)
Template-4                 48.3ms ± 2%     49.6ms ± 3%  +2.56%  (p=0.002 n=8+10)
TimeParse-4                 252ns ± 2%      249ns ± 3%    ~     (p=0.116 n=9+10)
TimeFormat-4                262ns ± 4%      252ns ± 3%  -4.01%  (p=0.000 n=9+10)
name                     old speed      new speed       delta
GobDecode-4               145MB/s ± 3%    146MB/s ± 3%    ~     (p=0.436 n=10+10)
GobEncode-4               166MB/s ± 5%    172MB/s ± 2%  +3.28%  (p=0.001 n=9+10)
Gzip-4                   98.6MB/s ± 4%  100.4MB/s ± 3%    ~     (p=0.123 n=10+10)
Gunzip-4                  639MB/s ± 3%    645MB/s ± 3%    ~     (p=0.481 n=10+10)
JSONEncode-4              185MB/s ± 8%    189MB/s ± 3%    ~     (p=0.280 n=10+10)
JSONDecode-4             46.0MB/s ± 9%   47.0MB/s ± 2%  +2.21%  (p=0.046 n=9+10)
GoParse-4                20.1MB/s ± 9%   20.6MB/s ± 2%    ~     (p=0.239 n=10+10)
RegexpMatchEasy0_32-4     460MB/s ± 4%    467MB/s ± 2%    ~     (p=0.165 n=10+10)
RegexpMatchEasy0_1K-4    6.19GB/s ± 3%   6.28GB/s ± 3%    ~     (p=0.165 n=10+10)
RegexpMatchEasy1_32-4     487MB/s ± 5%    497MB/s ± 2%  +2.00%  (p=0.043 n=10+10)
RegexpMatchEasy1_1K-4    3.67GB/s ± 2%   3.67GB/s ± 3%    ~     (p=0.963 n=8+9)
RegexpMatchMedium_32-4   10.1MB/s ± 3%   10.1MB/s ± 4%    ~     (p=0.435 n=10+9)
RegexpMatchMedium_1K-4   34.0MB/s ± 1%   33.7MB/s ± 2%    ~     (p=0.173 n=8+10)
RegexpMatchHard_32-4     22.9MB/s ± 2%   22.7MB/s ± 4%    ~     (p=0.565 n=10+10)
RegexpMatchHard_1K-4     24.0MB/s ± 3%   24.7MB/s ± 3%  +2.64%  (p=0.001 n=9+9)
Revcomp-4                 766MB/s ± 4%    775MB/s ± 5%    ~     (p=0.720 n=9+10)
Template-4               40.2MB/s ± 2%   39.2MB/s ± 3%  -2.47%  (p=0.002 n=8+10)
The rules match ~1800 times during all.bash.
Fixes #18943
Change-Id: I64be1ada34e89c486dfd935bf429b35652117ed4
Reviewed-on: https://go-review.googlesource.com/94766
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
											
										 
											2018-02-17 13:54:03 +01:00
										 |  |  | 	// amd64:"BTSQ\t[$]63" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	// s390x:"LNDFR\t",-"MOVD\t"     (no integer load/store) | 
					
						
							| 
									
										
										
										
											2018-10-15 12:53:07 -04:00
										 |  |  | 	// ppc64:"FCPSGN" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	// ppc64le:"FCPSGN" | 
					
						
							| 
									
										
										
										
											2018-09-12 01:43:09 +00:00
										 |  |  | 	// arm64:"ORR", -"AND" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	sink64[1] = math.Copysign(c, -1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Like math.Copysign(c, -1), but with integer operations. Useful | 
					
						
							|  |  |  | 	// for platforms that have a copysign opcode to see if it's detected. | 
					
						
							|  |  |  | 	// s390x:"LNDFR\t",-"MOVD\t"     (no integer load/store) | 
					
						
							|  |  |  | 	sink64[2] = math.Float64frombits(math.Float64bits(a) | 1<<63) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-09-09 17:50:35 +02:00
										 |  |  | 	// amd64:"ANDQ","ORQ" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	// s390x:"CPSDR\t",-"MOVD\t"     (no integer load/store) | 
					
						
							| 
									
										
										
										
											2018-10-15 12:53:07 -04:00
										 |  |  | 	// ppc64:"FCPSGN" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	// ppc64le:"FCPSGN" | 
					
						
							|  |  |  | 	sink64[3] = math.Copysign(-1, c) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-29 20:57:33 -04:00
										 |  |  | func fma(x, y, z float64) float64 { | 
					
						
							| 
									
										
										
										
											2018-09-25 03:10:33 -04:00
										 |  |  | 	// amd64:"VFMADD231SD" | 
					
						
							| 
									
										
										
										
											2018-10-15 03:14:57 -04:00
										 |  |  | 	// arm/6:"FMULAD" | 
					
						
							| 
									
										
										
										
											2018-08-29 20:57:33 -04:00
										 |  |  | 	// arm64:"FMADDD" | 
					
						
							|  |  |  | 	// s390x:"FMADD" | 
					
						
							|  |  |  | 	// ppc64:"FMADD" | 
					
						
							|  |  |  | 	// ppc64le:"FMADD" | 
					
						
							| 
									
										
											  
											
												math, cmd/compile: rename Fma to FMA
This API was added for #25819, where it was discussed as math.FMA.
The commit adding it used math.Fma, presumably for consistency
with the rest of the unusual names in package math
(Sincos, Acosh, Erfcinv, Float32bits, etc).
I believe that using an idiomatic Go name is more important here
than consistency with these other names, most of which are historical
baggage from C's standard library.
Early additions like Float32frombits happened before "uppercase for export"
(so they were originally like "float32frombits") and they were not properly
reconsidered when we uppercased the symbols to export them.
That's a mistake we live with.
The names of functions we have added since then, and even a few
that were legacy, are more properly Go-cased, such as IsNaN, IsInf,
and RoundToEven, rather than Isnan, Isinf, and Roundtoeven.
And also constants like MaxFloat32.
For new API, we should keep using proper Go-cased symbols
instead of minimally-upper-cased-C symbols.
So math.FMA, not math.Fma.
This API has not yet been released, so this change does not break
the compatibility promise.
This CL also modifies cmd/compile, since the compiler knows
the name of the function. I could have stopped at changing the
string constants, but it seemed to make more sense to use a
consistent casing everywhere.
Change-Id: I0f6f3407f41e99bfa8239467345c33945088896e
Reviewed-on: https://go-review.googlesource.com/c/go/+/205317
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
											
										 
											2019-11-04 19:43:45 -05:00
										 |  |  | 	return math.FMA(x, y, z) | 
					
						
							| 
									
										
										
										
											2018-08-29 20:57:33 -04:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | func fromFloat64(f64 float64) uint64 { | 
					
						
							|  |  |  | 	// amd64:"MOVQ\tX.*, [^X].*" | 
					
						
							| 
									
										
										
										
											2018-07-16 04:45:25 +00:00
										 |  |  | 	// arm64:"FMOVD\tF.*, R.*" | 
					
						
							| 
									
										
										
										
											2018-10-15 12:53:07 -04:00
										 |  |  | 	// ppc64:"MFVSRD" | 
					
						
							|  |  |  | 	// ppc64le:"MFVSRD" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	return math.Float64bits(f64+1) + 1 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func fromFloat32(f32 float32) uint32 { | 
					
						
							|  |  |  | 	// amd64:"MOVL\tX.*, [^X].*" | 
					
						
							| 
									
										
										
										
											2018-07-16 04:45:25 +00:00
										 |  |  | 	// arm64:"FMOVS\tF.*, R.*" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	return math.Float32bits(f32+1) + 1 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func toFloat64(u64 uint64) float64 { | 
					
						
							|  |  |  | 	// amd64:"MOVQ\t[^X].*, X.*" | 
					
						
							| 
									
										
										
										
											2018-07-16 04:45:25 +00:00
										 |  |  | 	// arm64:"FMOVD\tR.*, F.*" | 
					
						
							| 
									
										
										
										
											2018-10-15 12:53:07 -04:00
										 |  |  | 	// ppc64:"MTVSRD" | 
					
						
							|  |  |  | 	// ppc64le:"MTVSRD" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	return math.Float64frombits(u64+1) + 1 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func toFloat32(u32 uint32) float32 { | 
					
						
							|  |  |  | 	// amd64:"MOVL\t[^X].*, X.*" | 
					
						
							| 
									
										
										
										
											2018-07-16 04:45:25 +00:00
										 |  |  | 	// arm64:"FMOVS\tR.*, F.*" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	return math.Float32frombits(u32+1) + 1 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Test that comparisons with constants converted to float | 
					
						
							|  |  |  | // are evaluated at compile-time | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func constantCheck64() bool { | 
					
						
							|  |  |  | 	// amd64:"MOVB\t[$]0",-"FCMP",-"MOVB\t[$]1" | 
					
						
							|  |  |  | 	// s390x:"MOV(B|BZ|D)\t[$]0,",-"FCMPU",-"MOV(B|BZ|D)\t[$]1," | 
					
						
							|  |  |  | 	return 0.5 == float64(uint32(1)) || 1.5 > float64(uint64(1<<63)) || math.NaN() == math.NaN() | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func constantCheck32() bool { | 
					
						
							|  |  |  | 	// amd64:"MOVB\t[$]1",-"FCMP",-"MOVB\t[$]0" | 
					
						
							|  |  |  | 	// s390x:"MOV(B|BZ|D)\t[$]1,",-"FCMPU",-"MOV(B|BZ|D)\t[$]0," | 
					
						
							|  |  |  | 	return float32(0.5) <= float32(int64(1)) && float32(1.5) >= float32(int32(-1<<31)) && float32(math.NaN()) != float32(math.NaN()) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Test that integer constants are converted to floating point constants | 
					
						
							|  |  |  | // at compile-time | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func constantConvert32(x float32) float32 { | 
					
						
							|  |  |  | 	// amd64:"MOVSS\t[$]f32.3f800000\\(SB\\)" | 
					
						
							|  |  |  | 	// s390x:"FMOVS\t[$]f32.3f800000\\(SB\\)" | 
					
						
							| 
									
										
										
										
											2018-10-15 12:53:07 -04:00
										 |  |  | 	// ppc64:"FMOVS\t[$]f32.3f800000\\(SB\\)" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	// ppc64le:"FMOVS\t[$]f32.3f800000\\(SB\\)" | 
					
						
							| 
									
										
										
										
											2018-07-16 04:45:25 +00:00
										 |  |  | 	// arm64:"FMOVS\t[$]\\(1.0\\)" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	if x > math.Float32frombits(0x3f800000) { | 
					
						
							|  |  |  | 		return -x | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return x | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func constantConvertInt32(x uint32) uint32 { | 
					
						
							|  |  |  | 	// amd64:-"MOVSS" | 
					
						
							|  |  |  | 	// s390x:-"FMOVS" | 
					
						
							| 
									
										
										
										
											2018-10-15 12:53:07 -04:00
										 |  |  | 	// ppc64:-"FMOVS" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	// ppc64le:-"FMOVS" | 
					
						
							| 
									
										
										
										
											2018-07-16 04:45:25 +00:00
										 |  |  | 	// arm64:-"FMOVS" | 
					
						
							| 
									
										
										
										
											2018-03-03 19:17:20 +01:00
										 |  |  | 	if x > math.Float32bits(1) { | 
					
						
							|  |  |  | 		return -x | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return x | 
					
						
							|  |  |  | } |