2016-07-21 12:42:49 -04:00
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import "strings"
// Notes:
// - Integer types live in the low portion of registers. Upper portions are junk.
// - Boolean types use the low-order byte of a register. 0=false, 1=true.
// Upper bytes are junk.
2017-01-07 08:23:11 -08:00
// - *const instructions may use a constant larger than the instruction can encode.
2016-07-21 12:42:49 -04:00
// In this case the assembler expands to multiple instructions and uses tmp
// register (R27).
2022-08-18 01:31:57 +00:00
// - All 32-bit Ops will zero the upper 32 bits of the destination register.
2016-07-21 12:42:49 -04:00
// Suffixes encode the bit width of various instructions.
// D (double word) = 64 bit
// W (word) = 32 bit
// H (half word) = 16 bit
// HU = 16 bit unsigned
// B (byte) = 8 bit
// BU = 8 bit unsigned
// S (single) = 32 bit float
// D (double) = 64 bit float
// Note: registers not used in regalloc are not included in this list,
// so that regmask stays within int64
// Be careful when hand coding regmasks.
var regNamesARM64 = [ ] string {
"R0" ,
"R1" ,
"R2" ,
"R3" ,
"R4" ,
"R5" ,
"R6" ,
"R7" ,
"R8" ,
"R9" ,
"R10" ,
"R11" ,
"R12" ,
"R13" ,
"R14" ,
"R15" ,
"R16" ,
"R17" ,
2024-11-24 15:29:56 -08:00
// R18 = platform register, not used
2016-07-21 12:42:49 -04:00
"R19" ,
"R20" ,
"R21" ,
"R22" ,
"R23" ,
"R24" ,
"R25" ,
"R26" ,
// R27 = REGTMP not used in regalloc
2024-11-24 15:29:56 -08:00
"g" , // aka R28
"R29" , // frame pointer, not used
"R30" , // aka REGLINK
"ZERO" , // zero register (aka R31)
"SP" , // stack pointer (aka R31)
// Note: both ZERO and SP are register number 31!
// What r31 means in a particular instruction depends on
// the instruction. Generally, for arguments of instructions
// which are addresses to load or store from, r31 means SP.
// In other instructions, r31 means ZERO. But there are
// exceptions.
// See https://stackoverflow.com/questions/61532867
// This does not have much of an effect here, as the
// cmd/internal/obj/arm64 interface treats them as two
// different registers and picks the right instruction
// that encodes what r31 means. But see issue 71651.
2016-07-21 12:42:49 -04:00
"F0" ,
"F1" ,
"F2" ,
"F3" ,
"F4" ,
"F5" ,
"F6" ,
"F7" ,
"F8" ,
"F9" ,
"F10" ,
"F11" ,
"F12" ,
"F13" ,
"F14" ,
"F15" ,
"F16" ,
"F17" ,
"F18" ,
"F19" ,
"F20" ,
"F21" ,
"F22" ,
"F23" ,
"F24" ,
"F25" ,
"F26" ,
"F27" ,
cmd/compile, runtime, etc: get rid of constant FP registers
On ARM64, MIPS64, and PPC64, some floating point registers were
reserved for constants 0, 1, 2, 0.5, etc. This CL removes them.
On ARM64, they are never used. On MIPS64 and PPC64, the only use
case is a multiplication-by-2 in the old backend of the compiler,
which is replaced with an addition.
Change-Id: I737cbf43283756e3408964fc88c567a938c57036
Reviewed-on: https://go-review.googlesource.com/28095
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-08-30 14:46:25 -04:00
"F28" ,
"F29" ,
"F30" ,
"F31" ,
2016-07-21 12:42:49 -04:00
2019-10-21 14:07:50 -04:00
// If you add registers, update asyncPreempt in runtime.
2016-07-21 12:42:49 -04:00
// pseudo-registers
"SB" ,
}
func init ( ) {
// Make map from reg names to reg integers.
if len ( regNamesARM64 ) > 64 {
panic ( "too many registers" )
}
num := map [ string ] int { }
for i , name := range regNamesARM64 {
num [ name ] = i
}
buildReg := func ( s string ) regMask {
m := regMask ( 0 )
for _ , r := range strings . Split ( s , " " ) {
if n , ok := num [ r ] ; ok {
m |= regMask ( 1 ) << uint ( n )
continue
}
panic ( "register " + r + " not found" )
}
return m
}
// Common individual register masks
var (
2016-10-26 16:06:16 -04:00
gp = buildReg ( "R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30" )
2025-04-05 08:34:12 -07:00
gpg = gp | buildReg ( "g" )
gpsp = gp | buildReg ( "SP" )
gpspg = gpg | buildReg ( "SP" )
gpspsbg = gpspg | buildReg ( "SB" )
cmd/compile, runtime, etc: get rid of constant FP registers
On ARM64, MIPS64, and PPC64, some floating point registers were
reserved for constants 0, 1, 2, 0.5, etc. This CL removes them.
On ARM64, they are never used. On MIPS64 and PPC64, the only use
case is a multiplication-by-2 in the old backend of the compiler,
which is replaced with an addition.
Change-Id: I737cbf43283756e3408964fc88c567a938c57036
Reviewed-on: https://go-review.googlesource.com/28095
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-08-30 14:46:25 -04:00
fp = buildReg ( "F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31" )
2016-08-04 06:57:34 -04:00
callerSave = gp | fp | buildReg ( "g" ) // runtime.setg (and anything calling it) may clobber g
2025-06-04 21:49:08 -07:00
r24to25 = buildReg ( "R24 R25" )
r23to25 = buildReg ( "R23 R24 R25" )
2025-04-05 08:34:12 -07:00
rz = buildReg ( "ZERO" )
2025-06-18 15:06:55 -07:00
first16 = buildReg ( "R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15" )
2016-07-21 12:42:49 -04:00
)
// Common regInfo
var (
2019-01-14 09:36:18 +00:00
gp01 = regInfo { inputs : nil , outputs : [ ] regMask { gp } }
gp0flags1 = regInfo { inputs : [ ] regMask { 0 } , outputs : [ ] regMask { gp } }
gp11 = regInfo { inputs : [ ] regMask { gpg } , outputs : [ ] regMask { gp } }
gp11sp = regInfo { inputs : [ ] regMask { gpspg } , outputs : [ ] regMask { gp } }
gp1flags = regInfo { inputs : [ ] regMask { gpg } }
2025-08-21 17:41:13 +03:00
gp1flagsflags = regInfo { inputs : [ ] regMask { gpg } }
2019-01-14 09:36:18 +00:00
gp1flags1 = regInfo { inputs : [ ] regMask { gpg } , outputs : [ ] regMask { gp } }
gp11flags = regInfo { inputs : [ ] regMask { gpg } , outputs : [ ] regMask { gp , 0 } }
gp21 = regInfo { inputs : [ ] regMask { gpg , gpg } , outputs : [ ] regMask { gp } }
gp21nog = regInfo { inputs : [ ] regMask { gp , gp } , outputs : [ ] regMask { gp } }
2019-03-21 03:24:47 +00:00
gp21flags = regInfo { inputs : [ ] regMask { gp , gp } , outputs : [ ] regMask { gp , 0 } }
2019-01-14 09:36:18 +00:00
gp2flags = regInfo { inputs : [ ] regMask { gpg , gpg } }
2025-08-21 17:41:13 +03:00
gp2flagsflags = regInfo { inputs : [ ] regMask { gpg , gpg } }
2019-01-14 09:36:18 +00:00
gp2flags1 = regInfo { inputs : [ ] regMask { gp , gp } , outputs : [ ] regMask { gp } }
gp2flags1flags = regInfo { inputs : [ ] regMask { gp , gp , 0 } , outputs : [ ] regMask { gp , 0 } }
2025-04-05 08:34:12 -07:00
gp2load = regInfo { inputs : [ ] regMask { gpspsbg , gpg } , outputs : [ ] regMask { gp } }
2019-01-14 09:36:18 +00:00
gp31 = regInfo { inputs : [ ] regMask { gpg , gpg , gpg } , outputs : [ ] regMask { gp } }
2025-04-05 08:34:12 -07:00
gpload = regInfo { inputs : [ ] regMask { gpspsbg } , outputs : [ ] regMask { gp } }
gpload2 = regInfo { inputs : [ ] regMask { gpspsbg } , outputs : [ ] regMask { gpg , gpg } }
gpstore = regInfo { inputs : [ ] regMask { gpspsbg , gpg | rz } }
gpstore2 = regInfo { inputs : [ ] regMask { gpspsbg , gpg | rz , gpg | rz } }
gpxchg = regInfo { inputs : [ ] regMask { gpspsbg , gpg | rz } , outputs : [ ] regMask { gp } }
gpcas = regInfo { inputs : [ ] regMask { gpspsbg , gpg | rz , gpg | rz } , outputs : [ ] regMask { gp } }
2019-01-14 09:36:18 +00:00
fp01 = regInfo { inputs : nil , outputs : [ ] regMask { fp } }
fp11 = regInfo { inputs : [ ] regMask { fp } , outputs : [ ] regMask { fp } }
fpgp = regInfo { inputs : [ ] regMask { fp } , outputs : [ ] regMask { gp } }
gpfp = regInfo { inputs : [ ] regMask { gp } , outputs : [ ] regMask { fp } }
fp21 = regInfo { inputs : [ ] regMask { fp , fp } , outputs : [ ] regMask { fp } }
fp31 = regInfo { inputs : [ ] regMask { fp , fp , fp } , outputs : [ ] regMask { fp } }
fp2flags = regInfo { inputs : [ ] regMask { fp , fp } }
fp1flags = regInfo { inputs : [ ] regMask { fp } }
2025-04-05 08:34:12 -07:00
fpload = regInfo { inputs : [ ] regMask { gpspsbg } , outputs : [ ] regMask { fp } }
fpload2 = regInfo { inputs : [ ] regMask { gpspsbg } , outputs : [ ] regMask { fp , fp } }
fp2load = regInfo { inputs : [ ] regMask { gpspsbg , gpg } , outputs : [ ] regMask { fp } }
fpstore = regInfo { inputs : [ ] regMask { gpspsbg , fp } }
fpstoreidx = regInfo { inputs : [ ] regMask { gpspsbg , gpg , fp } }
fpstore2 = regInfo { inputs : [ ] regMask { gpspsbg , fp , fp } }
2019-01-14 09:36:18 +00:00
readflags = regInfo { inputs : nil , outputs : [ ] regMask { gp } }
2021-06-15 14:04:30 +00:00
prefreg = regInfo { inputs : [ ] regMask { gpspsbg } }
2016-07-21 12:42:49 -04:00
)
ops := [ ] opData {
// binary ops
2019-01-14 09:36:18 +00:00
{ name : "ADCSflags" , argLength : 3 , reg : gp2flags1flags , typ : "(UInt64,Flags)" , asm : "ADCS" , commutative : true } , // arg0+arg1+carry, set flags.
{ name : "ADCzerocarry" , argLength : 1 , reg : gp0flags1 , typ : "UInt64" , asm : "ADC" } , // ZR+ZR+carry
{ name : "ADD" , argLength : 2 , reg : gp21 , asm : "ADD" , commutative : true } , // arg0 + arg1
{ name : "ADDconst" , argLength : 1 , reg : gp11sp , asm : "ADD" , aux : "Int64" } , // arg0 + auxInt
2019-03-21 03:24:47 +00:00
{ name : "ADDSconstflags" , argLength : 1 , reg : gp11flags , typ : "(UInt64,Flags)" , asm : "ADDS" , aux : "Int64" } , // arg0+auxint, set flags.
{ name : "ADDSflags" , argLength : 2 , reg : gp21flags , typ : "(UInt64,Flags)" , asm : "ADDS" , commutative : true } , // arg0+arg1, set flags.
2019-01-14 09:36:18 +00:00
{ name : "SUB" , argLength : 2 , reg : gp21 , asm : "SUB" } , // arg0 - arg1
{ name : "SUBconst" , argLength : 1 , reg : gp11 , asm : "SUB" , aux : "Int64" } , // arg0 - auxInt
2019-03-20 12:46:20 +00:00
{ name : "SBCSflags" , argLength : 3 , reg : gp2flags1flags , typ : "(UInt64,Flags)" , asm : "SBCS" } , // arg0-(arg1+borrowing), set flags.
{ name : "SUBSflags" , argLength : 2 , reg : gp21flags , typ : "(UInt64,Flags)" , asm : "SUBS" } , // arg0 - arg1, set flags.
2019-01-14 09:36:18 +00:00
{ name : "MUL" , argLength : 2 , reg : gp21 , asm : "MUL" , commutative : true } , // arg0 * arg1
{ name : "MULW" , argLength : 2 , reg : gp21 , asm : "MULW" , commutative : true } , // arg0 * arg1, 32-bit
{ name : "MNEG" , argLength : 2 , reg : gp21 , asm : "MNEG" , commutative : true } , // -arg0 * arg1
{ name : "MNEGW" , argLength : 2 , reg : gp21 , asm : "MNEGW" , commutative : true } , // -arg0 * arg1, 32-bit
{ name : "MULH" , argLength : 2 , reg : gp21 , asm : "SMULH" , commutative : true } , // (arg0 * arg1) >> 64, signed
{ name : "UMULH" , argLength : 2 , reg : gp21 , asm : "UMULH" , commutative : true } , // (arg0 * arg1) >> 64, unsigned
{ name : "MULL" , argLength : 2 , reg : gp21 , asm : "SMULL" , commutative : true } , // arg0 * arg1, signed, 32-bit mult results in 64-bit
{ name : "UMULL" , argLength : 2 , reg : gp21 , asm : "UMULL" , commutative : true } , // arg0 * arg1, unsigned, 32-bit mult results in 64-bit
{ name : "DIV" , argLength : 2 , reg : gp21 , asm : "SDIV" } , // arg0 / arg1, signed
2023-09-03 00:08:17 +08:00
{ name : "UDIV" , argLength : 2 , reg : gp21 , asm : "UDIV" } , // arg0 / arg1, unsigned
2019-01-14 09:36:18 +00:00
{ name : "DIVW" , argLength : 2 , reg : gp21 , asm : "SDIVW" } , // arg0 / arg1, signed, 32 bit
2023-09-03 00:08:17 +08:00
{ name : "UDIVW" , argLength : 2 , reg : gp21 , asm : "UDIVW" } , // arg0 / arg1, unsigned, 32 bit
2019-01-14 09:36:18 +00:00
{ name : "MOD" , argLength : 2 , reg : gp21 , asm : "REM" } , // arg0 % arg1, signed
{ name : "UMOD" , argLength : 2 , reg : gp21 , asm : "UREM" } , // arg0 % arg1, unsigned
{ name : "MODW" , argLength : 2 , reg : gp21 , asm : "REMW" } , // arg0 % arg1, signed, 32 bit
{ name : "UMODW" , argLength : 2 , reg : gp21 , asm : "UREMW" } , // arg0 % arg1, unsigned, 32 bit
2016-07-21 12:42:49 -04:00
2018-02-07 12:24:41 +00:00
{ name : "FADDS" , argLength : 2 , reg : fp21 , asm : "FADDS" , commutative : true } , // arg0 + arg1
{ name : "FADDD" , argLength : 2 , reg : fp21 , asm : "FADDD" , commutative : true } , // arg0 + arg1
{ name : "FSUBS" , argLength : 2 , reg : fp21 , asm : "FSUBS" } , // arg0 - arg1
{ name : "FSUBD" , argLength : 2 , reg : fp21 , asm : "FSUBD" } , // arg0 - arg1
{ name : "FMULS" , argLength : 2 , reg : fp21 , asm : "FMULS" , commutative : true } , // arg0 * arg1
{ name : "FMULD" , argLength : 2 , reg : fp21 , asm : "FMULD" , commutative : true } , // arg0 * arg1
{ name : "FNMULS" , argLength : 2 , reg : fp21 , asm : "FNMULS" , commutative : true } , // -(arg0 * arg1)
{ name : "FNMULD" , argLength : 2 , reg : fp21 , asm : "FNMULD" , commutative : true } , // -(arg0 * arg1)
{ name : "FDIVS" , argLength : 2 , reg : fp21 , asm : "FDIVS" } , // arg0 / arg1
{ name : "FDIVD" , argLength : 2 , reg : fp21 , asm : "FDIVD" } , // arg0 / arg1
2016-07-21 12:42:49 -04:00
{ name : "AND" , argLength : 2 , reg : gp21 , asm : "AND" , commutative : true } , // arg0 & arg1
2016-07-22 06:41:14 -04:00
{ name : "ANDconst" , argLength : 1 , reg : gp11 , asm : "AND" , aux : "Int64" } , // arg0 & auxInt
2016-07-21 12:42:49 -04:00
{ name : "OR" , argLength : 2 , reg : gp21 , asm : "ORR" , commutative : true } , // arg0 | arg1
2016-07-22 06:41:14 -04:00
{ name : "ORconst" , argLength : 1 , reg : gp11 , asm : "ORR" , aux : "Int64" } , // arg0 | auxInt
2016-07-21 12:42:49 -04:00
{ name : "XOR" , argLength : 2 , reg : gp21 , asm : "EOR" , commutative : true } , // arg0 ^ arg1
2016-07-22 06:41:14 -04:00
{ name : "XORconst" , argLength : 1 , reg : gp11 , asm : "EOR" , aux : "Int64" } , // arg0 ^ auxInt
2016-07-21 12:42:49 -04:00
{ name : "BIC" , argLength : 2 , reg : gp21 , asm : "BIC" } , // arg0 &^ arg1
2018-02-25 09:10:54 +00:00
{ name : "EON" , argLength : 2 , reg : gp21 , asm : "EON" } , // arg0 ^ ^arg1
{ name : "ORN" , argLength : 2 , reg : gp21 , asm : "ORN" } , // arg0 | ^arg1
2016-07-21 12:42:49 -04:00
// unary ops
2019-03-20 12:46:20 +00:00
{ name : "MVN" , argLength : 1 , reg : gp11 , asm : "MVN" } , // ^arg0
{ name : "NEG" , argLength : 1 , reg : gp11 , asm : "NEG" } , // -arg0
{ name : "NEGSflags" , argLength : 1 , reg : gp11flags , typ : "(UInt64,Flags)" , asm : "NEGS" } , // -arg0, set flags.
{ name : "NGCzerocarry" , argLength : 1 , reg : gp0flags1 , typ : "UInt64" , asm : "NGC" } , // -1 if borrowing, 0 otherwise.
{ name : "FABSD" , argLength : 1 , reg : fp11 , asm : "FABSD" } , // abs(arg0), float64
{ name : "FNEGS" , argLength : 1 , reg : fp11 , asm : "FNEGS" } , // -arg0, float32
{ name : "FNEGD" , argLength : 1 , reg : fp11 , asm : "FNEGD" } , // -arg0, float64
{ name : "FSQRTD" , argLength : 1 , reg : fp11 , asm : "FSQRTD" } , // sqrt(arg0), float64
2020-12-07 19:15:15 +08:00
{ name : "FSQRTS" , argLength : 1 , reg : fp11 , asm : "FSQRTS" } , // sqrt(arg0), float32
2023-07-31 14:08:42 -07:00
{ name : "FMIND" , argLength : 2 , reg : fp21 , asm : "FMIND" } , // min(arg0, arg1)
{ name : "FMINS" , argLength : 2 , reg : fp21 , asm : "FMINS" } , // min(arg0, arg1)
{ name : "FMAXD" , argLength : 2 , reg : fp21 , asm : "FMAXD" } , // max(arg0, arg1)
{ name : "FMAXS" , argLength : 2 , reg : fp21 , asm : "FMAXS" } , // max(arg0, arg1)
2019-03-20 12:46:20 +00:00
{ name : "REV" , argLength : 1 , reg : gp11 , asm : "REV" } , // byte reverse, 64-bit
{ name : "REVW" , argLength : 1 , reg : gp11 , asm : "REVW" } , // byte reverse, 32-bit
2020-05-20 08:49:59 +00:00
{ name : "REV16" , argLength : 1 , reg : gp11 , asm : "REV16" } , // byte reverse in each 16-bit halfword, 64-bit
2019-03-20 12:46:20 +00:00
{ name : "REV16W" , argLength : 1 , reg : gp11 , asm : "REV16W" } , // byte reverse in each 16-bit halfword, 32-bit
{ name : "RBIT" , argLength : 1 , reg : gp11 , asm : "RBIT" } , // bit reverse, 64-bit
{ name : "RBITW" , argLength : 1 , reg : gp11 , asm : "RBITW" } , // bit reverse, 32-bit
{ name : "CLZ" , argLength : 1 , reg : gp11 , asm : "CLZ" } , // count leading zero, 64-bit
{ name : "CLZW" , argLength : 1 , reg : gp11 , asm : "CLZW" } , // count leading zero, 32-bit
{ name : "VCNT" , argLength : 1 , reg : fp11 , asm : "VCNT" } , // count set bits for each 8-bit unit and store the result in each 8-bit unit
{ name : "VUADDLV" , argLength : 1 , reg : fp11 , asm : "VUADDLV" } , // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit.
2018-02-28 16:30:07 -05:00
{ name : "LoweredRound32F" , argLength : 1 , reg : fp11 , resultInArg0 : true , zeroWidth : true } ,
{ name : "LoweredRound64F" , argLength : 1 , reg : fp11 , resultInArg0 : true , zeroWidth : true } ,
2016-07-21 12:42:49 -04:00
2018-02-17 12:57:44 +00:00
// 3-operand, the addend comes first
{ name : "FMADDS" , argLength : 3 , reg : fp31 , asm : "FMADDS" } , // +arg0 + (arg1 * arg2)
{ name : "FMADDD" , argLength : 3 , reg : fp31 , asm : "FMADDD" } , // +arg0 + (arg1 * arg2)
{ name : "FNMADDS" , argLength : 3 , reg : fp31 , asm : "FNMADDS" } , // -arg0 - (arg1 * arg2)
{ name : "FNMADDD" , argLength : 3 , reg : fp31 , asm : "FNMADDD" } , // -arg0 - (arg1 * arg2)
{ name : "FMSUBS" , argLength : 3 , reg : fp31 , asm : "FMSUBS" } , // +arg0 - (arg1 * arg2)
{ name : "FMSUBD" , argLength : 3 , reg : fp31 , asm : "FMSUBD" } , // +arg0 - (arg1 * arg2)
{ name : "FNMSUBS" , argLength : 3 , reg : fp31 , asm : "FNMSUBS" } , // -arg0 + (arg1 * arg2)
{ name : "FNMSUBD" , argLength : 3 , reg : fp31 , asm : "FNMSUBD" } , // -arg0 + (arg1 * arg2)
2018-08-13 10:38:25 +00:00
{ name : "MADD" , argLength : 3 , reg : gp31 , asm : "MADD" } , // +arg0 + (arg1 * arg2)
{ name : "MADDW" , argLength : 3 , reg : gp31 , asm : "MADDW" } , // +arg0 + (arg1 * arg2), 32-bit
{ name : "MSUB" , argLength : 3 , reg : gp31 , asm : "MSUB" } , // +arg0 - (arg1 * arg2)
{ name : "MSUBW" , argLength : 3 , reg : gp31 , asm : "MSUBW" } , // +arg0 - (arg1 * arg2), 32-bit
2018-02-17 12:57:44 +00:00
2016-07-22 06:41:14 -04:00
// shifts
cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX,
UBFIZ and UBFX opcodes.
go1 benchmarks results on Amberwing:
name old time/op new time/op delta
FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10)
Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10)
FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9)
FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10)
FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8)
FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9)
GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10)
JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10)
RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10)
RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10)
RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9)
TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9)
RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10)
GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10)
RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10)
RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9)
BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10)
HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10)
RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10)
FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal)
GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10)
Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9)
Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9)
RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9)
Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10)
FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10)
JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10)
Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10)
TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10)
There are some opportunities to improve this change further by adding
patterns to match the "extended register" versions of ADD/SUB/CMP, but I
think that should be evaluated on its own. The regressions in Template
and TimeFormat would likely be recovered by this, as they seem to be due
to generating:
ubfiz x0, x0, #3, #8
add x1, x2, x0
instead of
add x1, x2, x0, lsl #3
Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b
Reviewed-on: https://go-review.googlesource.com/88355
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-02-21 16:15:39 -05:00
{ name : "SLL" , argLength : 2 , reg : gp21 , asm : "LSL" } , // arg0 << arg1, shift amount is mod 64
2021-04-19 10:40:20 +08:00
{ name : "SLLconst" , argLength : 1 , reg : gp11 , asm : "LSL" , aux : "Int64" } , // arg0 << auxInt, auxInt should be in the range 0 to 63.
cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX,
UBFIZ and UBFX opcodes.
go1 benchmarks results on Amberwing:
name old time/op new time/op delta
FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10)
Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10)
FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9)
FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10)
FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8)
FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9)
GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10)
JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10)
RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10)
RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10)
RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9)
TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9)
RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10)
GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10)
RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10)
RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9)
BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10)
HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10)
RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10)
FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal)
GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10)
Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9)
Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9)
RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9)
Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10)
FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10)
JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10)
Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10)
TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10)
There are some opportunities to improve this change further by adding
patterns to match the "extended register" versions of ADD/SUB/CMP, but I
think that should be evaluated on its own. The regressions in Template
and TimeFormat would likely be recovered by this, as they seem to be due
to generating:
ubfiz x0, x0, #3, #8
add x1, x2, x0
instead of
add x1, x2, x0, lsl #3
Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b
Reviewed-on: https://go-review.googlesource.com/88355
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-02-21 16:15:39 -05:00
{ name : "SRL" , argLength : 2 , reg : gp21 , asm : "LSR" } , // arg0 >> arg1, unsigned, shift amount is mod 64
2021-04-19 10:40:20 +08:00
{ name : "SRLconst" , argLength : 1 , reg : gp11 , asm : "LSR" , aux : "Int64" } , // arg0 >> auxInt, unsigned, auxInt should be in the range 0 to 63.
cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX,
UBFIZ and UBFX opcodes.
go1 benchmarks results on Amberwing:
name old time/op new time/op delta
FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10)
Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10)
FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9)
FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10)
FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8)
FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9)
GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10)
JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10)
RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10)
RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10)
RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9)
TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9)
RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10)
GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10)
RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10)
RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9)
BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10)
HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10)
RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10)
FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal)
GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10)
Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9)
Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9)
RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9)
Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10)
FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10)
JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10)
Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10)
TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10)
There are some opportunities to improve this change further by adding
patterns to match the "extended register" versions of ADD/SUB/CMP, but I
think that should be evaluated on its own. The regressions in Template
and TimeFormat would likely be recovered by this, as they seem to be due
to generating:
ubfiz x0, x0, #3, #8
add x1, x2, x0
instead of
add x1, x2, x0, lsl #3
Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b
Reviewed-on: https://go-review.googlesource.com/88355
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-02-21 16:15:39 -05:00
{ name : "SRA" , argLength : 2 , reg : gp21 , asm : "ASR" } , // arg0 >> arg1, signed, shift amount is mod 64
2021-04-19 10:40:20 +08:00
{ name : "SRAconst" , argLength : 1 , reg : gp11 , asm : "ASR" , aux : "Int64" } , // arg0 >> auxInt, signed, auxInt should be in the range 0 to 63.
2018-06-30 06:48:51 +00:00
{ name : "ROR" , argLength : 2 , reg : gp21 , asm : "ROR" } , // arg0 right rotate by (arg1 mod 64) bits
{ name : "RORW" , argLength : 2 , reg : gp21 , asm : "RORW" } , // arg0 right rotate by (arg1 mod 32) bits
2021-04-19 10:40:20 +08:00
{ name : "RORconst" , argLength : 1 , reg : gp11 , asm : "ROR" , aux : "Int64" } , // arg0 right rotate by auxInt bits, auxInt should be in the range 0 to 63.
{ name : "RORWconst" , argLength : 1 , reg : gp11 , asm : "RORW" , aux : "Int64" } , // uint32(arg0) right rotate by auxInt bits, auxInt should be in the range 0 to 31.
{ name : "EXTRconst" , argLength : 2 , reg : gp21 , asm : "EXTR" , aux : "Int64" } , // extract 64 bits from arg0:arg1 starting at lsb auxInt, auxInt should be in the range 0 to 63.
{ name : "EXTRWconst" , argLength : 2 , reg : gp21 , asm : "EXTRW" , aux : "Int64" } , // extract 32 bits from arg0[31:0]:arg1[31:0] starting at lsb auxInt and zero top 32 bits, auxInt should be in the range 0 to 31.
2016-07-22 06:41:14 -04:00
2016-07-21 12:42:49 -04:00
// comparisons
{ name : "CMP" , argLength : 2 , reg : gp2flags , asm : "CMP" , typ : "Flags" } , // arg0 compare to arg1
2016-07-22 06:41:14 -04:00
{ name : "CMPconst" , argLength : 1 , reg : gp1flags , asm : "CMP" , aux : "Int64" , typ : "Flags" } , // arg0 compare to auxInt
2016-07-21 12:42:49 -04:00
{ name : "CMPW" , argLength : 2 , reg : gp2flags , asm : "CMPW" , typ : "Flags" } , // arg0 compare to arg1, 32 bit
{ name : "CMPWconst" , argLength : 1 , reg : gp1flags , asm : "CMPW" , aux : "Int32" , typ : "Flags" } , // arg0 compare to auxInt, 32 bit
2022-01-27 11:26:59 -05:00
{ name : "CMN" , argLength : 2 , reg : gp2flags , asm : "CMN" , typ : "Flags" , commutative : true } , // arg0 compare to -arg1, provided arg1 is not 1<<63
2016-07-22 06:41:14 -04:00
{ name : "CMNconst" , argLength : 1 , reg : gp1flags , asm : "CMN" , aux : "Int64" , typ : "Flags" } , // arg0 compare to -auxInt
2022-01-27 11:26:59 -05:00
{ name : "CMNW" , argLength : 2 , reg : gp2flags , asm : "CMNW" , typ : "Flags" , commutative : true } , // arg0 compare to -arg1, 32 bit, provided arg1 is not 1<<31
2016-07-21 12:42:49 -04:00
{ name : "CMNWconst" , argLength : 1 , reg : gp1flags , asm : "CMNW" , aux : "Int32" , typ : "Flags" } , // arg0 compare to -auxInt, 32 bit
2018-07-19 08:09:13 +00:00
{ name : "TST" , argLength : 2 , reg : gp2flags , asm : "TST" , typ : "Flags" , commutative : true } , // arg0 & arg1 compare to 0
2018-04-24 07:17:40 -04:00
{ name : "TSTconst" , argLength : 1 , reg : gp1flags , asm : "TST" , aux : "Int64" , typ : "Flags" } , // arg0 & auxInt compare to 0
2018-07-19 08:09:13 +00:00
{ name : "TSTW" , argLength : 2 , reg : gp2flags , asm : "TSTW" , typ : "Flags" , commutative : true } , // arg0 & arg1 compare to 0, 32 bit
2018-04-24 07:17:40 -04:00
{ name : "TSTWconst" , argLength : 1 , reg : gp1flags , asm : "TSTW" , aux : "Int32" , typ : "Flags" } , // arg0 & auxInt compare to 0, 32 bit
2016-07-21 12:42:49 -04:00
{ name : "FCMPS" , argLength : 2 , reg : fp2flags , asm : "FCMPS" , typ : "Flags" } , // arg0 compare to arg1, float32
{ name : "FCMPD" , argLength : 2 , reg : fp2flags , asm : "FCMPD" , typ : "Flags" } , // arg0 compare to arg1, float64
cmd/compile: optimize arm64 comparison of x and 0.0 with "FCMP $(0.0), Fn"
Code:
func comp(x float64) bool {return x < 0}
Previous version:
FMOVD "".x(FP), F0
FMOVD ZR, F1
FCMPD F1, F0
CSET MI, R0
MOVB R0, "".~r1+8(FP)
RET (R30)
Optimized version:
FMOVD "".x(FP), F0
FCMPD $(0.0), F0
CSET MI, R0
MOVB R0, "".~r1+8(FP)
RET (R30)
Math package benchmark results:
name old time/op new time/op delta
Acos-8 77.500000ns +- 0% 77.400000ns +- 0% -0.13% (p=0.000 n=9+10)
Acosh-8 98.600000ns +- 0% 98.100000ns +- 0% -0.51% (p=0.000 n=10+9)
Asin-8 67.600000ns +- 0% 66.600000ns +- 0% -1.48% (p=0.000 n=9+10)
Asinh-8 108.000000ns +- 0% 109.000000ns +- 0% +0.93% (p=0.000 n=10+10)
Atan-8 36.788889ns +- 0% 36.000000ns +- 0% -2.14% (p=0.000 n=9+10)
Atanh-8 104.000000ns +- 0% 105.000000ns +- 0% +0.96% (p=0.000 n=10+10)
Atan2-8 67.100000ns +- 0% 66.600000ns +- 0% -0.75% (p=0.000 n=10+10)
Cbrt-8 89.100000ns +- 0% 82.000000ns +- 0% -7.97% (p=0.000 n=10+10)
Erf-8 43.500000ns +- 0% 43.000000ns +- 0% -1.15% (p=0.000 n=10+10)
Erfc-8 49.000000ns +- 0% 48.220000ns +- 0% -1.59% (p=0.000 n=9+10)
Erfinv-8 59.100000ns +- 0% 58.600000ns +- 0% -0.85% (p=0.000 n=10+10)
Erfcinv-8 59.100000ns +- 0% 58.600000ns +- 0% -0.85% (p=0.000 n=10+10)
Expm1-8 56.600000ns +- 0% 56.040000ns +- 0% -0.99% (p=0.000 n=8+10)
Exp2Go-8 97.600000ns +- 0% 99.400000ns +- 0% +1.84% (p=0.000 n=10+10)
Dim-8 2.500000ns +- 0% 2.250000ns +- 0% -10.00% (p=0.000 n=10+10)
Mod-8 108.000000ns +- 0% 106.000000ns +- 0% -1.85% (p=0.000 n=8+8)
Frexp-8 12.000000ns +- 0% 12.500000ns +- 0% +4.17% (p=0.000 n=10+10)
Gamma-8 67.100000ns +- 0% 67.600000ns +- 0% +0.75% (p=0.000 n=10+10)
Hypot-8 17.100000ns +- 0% 17.000000ns +- 0% -0.58% (p=0.002 n=8+10)
Ilogb-8 9.010000ns +- 0% 8.510000ns +- 0% -5.55% (p=0.000 n=10+9)
J1-8 288.000000ns +- 0% 287.000000ns +- 0% -0.35% (p=0.000 n=10+10)
Jn-8 605.000000ns +- 0% 604.000000ns +- 0% -0.17% (p=0.001 n=8+9)
Logb-8 10.600000ns +- 0% 10.500000ns +- 0% -0.94% (p=0.000 n=9+10)
Log2-8 16.500000ns +- 0% 17.000000ns +- 0% +3.03% (p=0.000 n=10+10)
PowFrac-8 232.000000ns +- 0% 233.000000ns +- 0% +0.43% (p=0.000 n=10+10)
Remainder-8 70.600000ns +- 0% 69.600000ns +- 0% -1.42% (p=0.000 n=10+10)
SqrtGoLatency-8 77.600000ns +- 0% 76.600000ns +- 0% -1.29% (p=0.000 n=10+10)
Tanh-8 97.600000ns +- 0% 94.100000ns +- 0% -3.59% (p=0.000 n=10+10)
Y1-8 289.000000ns +- 0% 288.000000ns +- 0% -0.35% (p=0.000 n=10+10)
Yn-8 603.000000ns +- 0% 589.000000ns +- 0% -2.32% (p=0.000 n=10+10)
Change-Id: I6920734f8662b329aa58f5b8e4eeae73b409984d
Reviewed-on: https://go-review.googlesource.com/c/go/+/164719
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2019-02-15 11:21:46 +00:00
{ name : "FCMPS0" , argLength : 1 , reg : fp1flags , asm : "FCMPS" , typ : "Flags" } , // arg0 compare to 0, float32
{ name : "FCMPD0" , argLength : 1 , reg : fp1flags , asm : "FCMPD" , typ : "Flags" } , // arg0 compare to 0, float64
2016-07-21 12:42:49 -04:00
2016-08-10 13:24:03 -04:00
// shifted ops
2021-04-19 10:40:20 +08:00
{ name : "MVNshiftLL" , argLength : 1 , reg : gp11 , asm : "MVN" , aux : "Int64" } , // ^(arg0<<auxInt), auxInt should be in the range 0 to 63.
{ name : "MVNshiftRL" , argLength : 1 , reg : gp11 , asm : "MVN" , aux : "Int64" } , // ^(arg0>>auxInt), unsigned shift, auxInt should be in the range 0 to 63.
{ name : "MVNshiftRA" , argLength : 1 , reg : gp11 , asm : "MVN" , aux : "Int64" } , // ^(arg0>>auxInt), signed shift, auxInt should be in the range 0 to 63.
2021-09-19 13:51:37 -07:00
{ name : "MVNshiftRO" , argLength : 1 , reg : gp11 , asm : "MVN" , aux : "Int64" } , // ^(arg0 ROR auxInt), signed shift, auxInt should be in the range 0 to 63.
2021-04-19 10:40:20 +08:00
{ name : "NEGshiftLL" , argLength : 1 , reg : gp11 , asm : "NEG" , aux : "Int64" } , // -(arg0<<auxInt), auxInt should be in the range 0 to 63.
{ name : "NEGshiftRL" , argLength : 1 , reg : gp11 , asm : "NEG" , aux : "Int64" } , // -(arg0>>auxInt), unsigned shift, auxInt should be in the range 0 to 63.
{ name : "NEGshiftRA" , argLength : 1 , reg : gp11 , asm : "NEG" , aux : "Int64" } , // -(arg0>>auxInt), signed shift, auxInt should be in the range 0 to 63.
{ name : "ADDshiftLL" , argLength : 2 , reg : gp21 , asm : "ADD" , aux : "Int64" } , // arg0 + arg1<<auxInt, auxInt should be in the range 0 to 63.
{ name : "ADDshiftRL" , argLength : 2 , reg : gp21 , asm : "ADD" , aux : "Int64" } , // arg0 + arg1>>auxInt, unsigned shift, auxInt should be in the range 0 to 63.
{ name : "ADDshiftRA" , argLength : 2 , reg : gp21 , asm : "ADD" , aux : "Int64" } , // arg0 + arg1>>auxInt, signed shift, auxInt should be in the range 0 to 63.
{ name : "SUBshiftLL" , argLength : 2 , reg : gp21 , asm : "SUB" , aux : "Int64" } , // arg0 - arg1<<auxInt, auxInt should be in the range 0 to 63.
{ name : "SUBshiftRL" , argLength : 2 , reg : gp21 , asm : "SUB" , aux : "Int64" } , // arg0 - arg1>>auxInt, unsigned shift, auxInt should be in the range 0 to 63.
{ name : "SUBshiftRA" , argLength : 2 , reg : gp21 , asm : "SUB" , aux : "Int64" } , // arg0 - arg1>>auxInt, signed shift, auxInt should be in the range 0 to 63.
{ name : "ANDshiftLL" , argLength : 2 , reg : gp21 , asm : "AND" , aux : "Int64" } , // arg0 & (arg1<<auxInt), auxInt should be in the range 0 to 63.
{ name : "ANDshiftRL" , argLength : 2 , reg : gp21 , asm : "AND" , aux : "Int64" } , // arg0 & (arg1>>auxInt), unsigned shift, auxInt should be in the range 0 to 63.
{ name : "ANDshiftRA" , argLength : 2 , reg : gp21 , asm : "AND" , aux : "Int64" } , // arg0 & (arg1>>auxInt), signed shift, auxInt should be in the range 0 to 63.
2021-09-19 13:51:37 -07:00
{ name : "ANDshiftRO" , argLength : 2 , reg : gp21 , asm : "AND" , aux : "Int64" } , // arg0 & (arg1 ROR auxInt), signed shift, auxInt should be in the range 0 to 63.
2021-04-19 10:40:20 +08:00
{ name : "ORshiftLL" , argLength : 2 , reg : gp21 , asm : "ORR" , aux : "Int64" } , // arg0 | arg1<<auxInt, auxInt should be in the range 0 to 63.
{ name : "ORshiftRL" , argLength : 2 , reg : gp21 , asm : "ORR" , aux : "Int64" } , // arg0 | arg1>>auxInt, unsigned shift, auxInt should be in the range 0 to 63.
{ name : "ORshiftRA" , argLength : 2 , reg : gp21 , asm : "ORR" , aux : "Int64" } , // arg0 | arg1>>auxInt, signed shift, auxInt should be in the range 0 to 63.
2021-09-19 13:51:37 -07:00
{ name : "ORshiftRO" , argLength : 2 , reg : gp21 , asm : "ORR" , aux : "Int64" } , // arg0 | arg1 ROR auxInt, signed shift, auxInt should be in the range 0 to 63.
2021-04-19 10:40:20 +08:00
{ name : "XORshiftLL" , argLength : 2 , reg : gp21 , asm : "EOR" , aux : "Int64" } , // arg0 ^ arg1<<auxInt, auxInt should be in the range 0 to 63.
{ name : "XORshiftRL" , argLength : 2 , reg : gp21 , asm : "EOR" , aux : "Int64" } , // arg0 ^ arg1>>auxInt, unsigned shift, auxInt should be in the range 0 to 63.
{ name : "XORshiftRA" , argLength : 2 , reg : gp21 , asm : "EOR" , aux : "Int64" } , // arg0 ^ arg1>>auxInt, signed shift, auxInt should be in the range 0 to 63.
2021-09-19 13:51:37 -07:00
{ name : "XORshiftRO" , argLength : 2 , reg : gp21 , asm : "EOR" , aux : "Int64" } , // arg0 ^ arg1 ROR auxInt, signed shift, auxInt should be in the range 0 to 63.
2021-04-19 10:40:20 +08:00
{ name : "BICshiftLL" , argLength : 2 , reg : gp21 , asm : "BIC" , aux : "Int64" } , // arg0 &^ (arg1<<auxInt), auxInt should be in the range 0 to 63.
{ name : "BICshiftRL" , argLength : 2 , reg : gp21 , asm : "BIC" , aux : "Int64" } , // arg0 &^ (arg1>>auxInt), unsigned shift, auxInt should be in the range 0 to 63.
{ name : "BICshiftRA" , argLength : 2 , reg : gp21 , asm : "BIC" , aux : "Int64" } , // arg0 &^ (arg1>>auxInt), signed shift, auxInt should be in the range 0 to 63.
2021-09-19 13:51:37 -07:00
{ name : "BICshiftRO" , argLength : 2 , reg : gp21 , asm : "BIC" , aux : "Int64" } , // arg0 &^ (arg1 ROR auxInt), signed shift, auxInt should be in the range 0 to 63.
2021-04-19 10:40:20 +08:00
{ name : "EONshiftLL" , argLength : 2 , reg : gp21 , asm : "EON" , aux : "Int64" } , // arg0 ^ ^(arg1<<auxInt), auxInt should be in the range 0 to 63.
{ name : "EONshiftRL" , argLength : 2 , reg : gp21 , asm : "EON" , aux : "Int64" } , // arg0 ^ ^(arg1>>auxInt), unsigned shift, auxInt should be in the range 0 to 63.
{ name : "EONshiftRA" , argLength : 2 , reg : gp21 , asm : "EON" , aux : "Int64" } , // arg0 ^ ^(arg1>>auxInt), signed shift, auxInt should be in the range 0 to 63.
2021-09-19 13:51:37 -07:00
{ name : "EONshiftRO" , argLength : 2 , reg : gp21 , asm : "EON" , aux : "Int64" } , // arg0 ^ ^(arg1 ROR auxInt), signed shift, auxInt should be in the range 0 to 63.
2021-04-19 10:40:20 +08:00
{ name : "ORNshiftLL" , argLength : 2 , reg : gp21 , asm : "ORN" , aux : "Int64" } , // arg0 | ^(arg1<<auxInt), auxInt should be in the range 0 to 63.
{ name : "ORNshiftRL" , argLength : 2 , reg : gp21 , asm : "ORN" , aux : "Int64" } , // arg0 | ^(arg1>>auxInt), unsigned shift, auxInt should be in the range 0 to 63.
{ name : "ORNshiftRA" , argLength : 2 , reg : gp21 , asm : "ORN" , aux : "Int64" } , // arg0 | ^(arg1>>auxInt), signed shift, auxInt should be in the range 0 to 63.
2021-09-19 13:51:37 -07:00
{ name : "ORNshiftRO" , argLength : 2 , reg : gp21 , asm : "ORN" , aux : "Int64" } , // arg0 | ^(arg1 ROR auxInt), signed shift, auxInt should be in the range 0 to 63.
2021-04-19 10:40:20 +08:00
{ name : "CMPshiftLL" , argLength : 2 , reg : gp2flags , asm : "CMP" , aux : "Int64" , typ : "Flags" } , // arg0 compare to arg1<<auxInt, auxInt should be in the range 0 to 63.
{ name : "CMPshiftRL" , argLength : 2 , reg : gp2flags , asm : "CMP" , aux : "Int64" , typ : "Flags" } , // arg0 compare to arg1>>auxInt, unsigned shift, auxInt should be in the range 0 to 63.
{ name : "CMPshiftRA" , argLength : 2 , reg : gp2flags , asm : "CMP" , aux : "Int64" , typ : "Flags" } , // arg0 compare to arg1>>auxInt, signed shift, auxInt should be in the range 0 to 63.
{ name : "CMNshiftLL" , argLength : 2 , reg : gp2flags , asm : "CMN" , aux : "Int64" , typ : "Flags" } , // (arg0 + arg1<<auxInt) compare to 0, auxInt should be in the range 0 to 63.
{ name : "CMNshiftRL" , argLength : 2 , reg : gp2flags , asm : "CMN" , aux : "Int64" , typ : "Flags" } , // (arg0 + arg1>>auxInt) compare to 0, unsigned shift, auxInt should be in the range 0 to 63.
{ name : "CMNshiftRA" , argLength : 2 , reg : gp2flags , asm : "CMN" , aux : "Int64" , typ : "Flags" } , // (arg0 + arg1>>auxInt) compare to 0, signed shift, auxInt should be in the range 0 to 63.
{ name : "TSTshiftLL" , argLength : 2 , reg : gp2flags , asm : "TST" , aux : "Int64" , typ : "Flags" } , // (arg0 & arg1<<auxInt) compare to 0, auxInt should be in the range 0 to 63.
{ name : "TSTshiftRL" , argLength : 2 , reg : gp2flags , asm : "TST" , aux : "Int64" , typ : "Flags" } , // (arg0 & arg1>>auxInt) compare to 0, unsigned shift, auxInt should be in the range 0 to 63.
{ name : "TSTshiftRA" , argLength : 2 , reg : gp2flags , asm : "TST" , aux : "Int64" , typ : "Flags" } , // (arg0 & arg1>>auxInt) compare to 0, signed shift, auxInt should be in the range 0 to 63.
2021-09-19 13:51:37 -07:00
{ name : "TSTshiftRO" , argLength : 2 , reg : gp2flags , asm : "TST" , aux : "Int64" , typ : "Flags" } , // (arg0 & arg1 ROR auxInt) compare to 0, signed shift, auxInt should be in the range 0 to 63.
2016-08-10 13:24:03 -04:00
cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX,
UBFIZ and UBFX opcodes.
go1 benchmarks results on Amberwing:
name old time/op new time/op delta
FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10)
Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10)
FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9)
FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10)
FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8)
FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9)
GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10)
JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10)
RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10)
RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10)
RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9)
TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9)
RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10)
GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10)
RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10)
RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9)
BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10)
HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10)
RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10)
FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal)
GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10)
Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9)
Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9)
RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9)
Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10)
FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10)
JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10)
Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10)
TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10)
There are some opportunities to improve this change further by adding
patterns to match the "extended register" versions of ADD/SUB/CMP, but I
think that should be evaluated on its own. The regressions in Template
and TimeFormat would likely be recovered by this, as they seem to be due
to generating:
ubfiz x0, x0, #3, #8
add x1, x2, x0
instead of
add x1, x2, x0, lsl #3
Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b
Reviewed-on: https://go-review.googlesource.com/88355
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-02-21 16:15:39 -05:00
// bitfield ops
// for all bitfield ops lsb is auxInt>>8, width is auxInt&0xff
// insert low width bits of arg1 into the result starting at bit lsb, copy other bits from arg0
2020-02-17 17:47:34 -08:00
{ name : "BFI" , argLength : 2 , reg : gp21nog , asm : "BFI" , aux : "ARM64BitField" , resultInArg0 : true } ,
cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX,
UBFIZ and UBFX opcodes.
go1 benchmarks results on Amberwing:
name old time/op new time/op delta
FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10)
Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10)
FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9)
FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10)
FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8)
FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9)
GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10)
JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10)
RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10)
RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10)
RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9)
TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9)
RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10)
GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10)
RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10)
RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9)
BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10)
HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10)
RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10)
FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal)
GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10)
Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9)
Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9)
RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9)
Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10)
FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10)
JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10)
Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10)
TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10)
There are some opportunities to improve this change further by adding
patterns to match the "extended register" versions of ADD/SUB/CMP, but I
think that should be evaluated on its own. The regressions in Template
and TimeFormat would likely be recovered by this, as they seem to be due
to generating:
ubfiz x0, x0, #3, #8
add x1, x2, x0
instead of
add x1, x2, x0, lsl #3
Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b
Reviewed-on: https://go-review.googlesource.com/88355
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-02-21 16:15:39 -05:00
// extract width bits of arg1 starting at bit lsb and insert at low end of result, copy other bits from arg0
2020-02-17 17:47:34 -08:00
{ name : "BFXIL" , argLength : 2 , reg : gp21nog , asm : "BFXIL" , aux : "ARM64BitField" , resultInArg0 : true } ,
cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX,
UBFIZ and UBFX opcodes.
go1 benchmarks results on Amberwing:
name old time/op new time/op delta
FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10)
Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10)
FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9)
FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10)
FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8)
FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9)
GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10)
JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10)
RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10)
RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10)
RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9)
TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9)
RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10)
GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10)
RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10)
RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9)
BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10)
HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10)
RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10)
FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal)
GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10)
Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9)
Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9)
RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9)
Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10)
FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10)
JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10)
Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10)
TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10)
There are some opportunities to improve this change further by adding
patterns to match the "extended register" versions of ADD/SUB/CMP, but I
think that should be evaluated on its own. The regressions in Template
and TimeFormat would likely be recovered by this, as they seem to be due
to generating:
ubfiz x0, x0, #3, #8
add x1, x2, x0
instead of
add x1, x2, x0, lsl #3
Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b
Reviewed-on: https://go-review.googlesource.com/88355
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-02-21 16:15:39 -05:00
// insert low width bits of arg0 into the result starting at bit lsb, bits to the left of the inserted bit field are set to the high/sign bit of the inserted bit field, bits to the right are zeroed
2020-02-17 17:47:34 -08:00
{ name : "SBFIZ" , argLength : 1 , reg : gp11 , asm : "SBFIZ" , aux : "ARM64BitField" } ,
cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX,
UBFIZ and UBFX opcodes.
go1 benchmarks results on Amberwing:
name old time/op new time/op delta
FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10)
Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10)
FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9)
FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10)
FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8)
FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9)
GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10)
JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10)
RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10)
RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10)
RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9)
TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9)
RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10)
GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10)
RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10)
RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9)
BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10)
HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10)
RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10)
FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal)
GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10)
Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9)
Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9)
RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9)
Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10)
FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10)
JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10)
Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10)
TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10)
There are some opportunities to improve this change further by adding
patterns to match the "extended register" versions of ADD/SUB/CMP, but I
think that should be evaluated on its own. The regressions in Template
and TimeFormat would likely be recovered by this, as they seem to be due
to generating:
ubfiz x0, x0, #3, #8
add x1, x2, x0
instead of
add x1, x2, x0, lsl #3
Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b
Reviewed-on: https://go-review.googlesource.com/88355
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-02-21 16:15:39 -05:00
// extract width bits of arg0 starting at bit lsb and insert at low end of result, remaining high bits are set to the high/sign bit of the extracted bitfield
2020-02-17 17:47:34 -08:00
{ name : "SBFX" , argLength : 1 , reg : gp11 , asm : "SBFX" , aux : "ARM64BitField" } ,
cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX,
UBFIZ and UBFX opcodes.
go1 benchmarks results on Amberwing:
name old time/op new time/op delta
FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10)
Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10)
FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9)
FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10)
FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8)
FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9)
GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10)
JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10)
RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10)
RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10)
RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9)
TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9)
RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10)
GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10)
RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10)
RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9)
BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10)
HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10)
RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10)
FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal)
GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10)
Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9)
Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9)
RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9)
Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10)
FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10)
JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10)
Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10)
TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10)
There are some opportunities to improve this change further by adding
patterns to match the "extended register" versions of ADD/SUB/CMP, but I
think that should be evaluated on its own. The regressions in Template
and TimeFormat would likely be recovered by this, as they seem to be due
to generating:
ubfiz x0, x0, #3, #8
add x1, x2, x0
instead of
add x1, x2, x0, lsl #3
Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b
Reviewed-on: https://go-review.googlesource.com/88355
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-02-21 16:15:39 -05:00
// insert low width bits of arg0 into the result starting at bit lsb, bits to the left and right of the inserted bit field are zeroed
2020-02-17 17:47:34 -08:00
{ name : "UBFIZ" , argLength : 1 , reg : gp11 , asm : "UBFIZ" , aux : "ARM64BitField" } ,
cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX,
UBFIZ and UBFX opcodes.
go1 benchmarks results on Amberwing:
name old time/op new time/op delta
FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10)
Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10)
FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9)
FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10)
FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8)
FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9)
GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10)
JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10)
RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10)
RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10)
RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9)
TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9)
RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10)
GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10)
RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10)
RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9)
BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10)
HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10)
RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10)
FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal)
GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10)
Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9)
Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9)
RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9)
Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10)
FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10)
JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10)
Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10)
TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10)
There are some opportunities to improve this change further by adding
patterns to match the "extended register" versions of ADD/SUB/CMP, but I
think that should be evaluated on its own. The regressions in Template
and TimeFormat would likely be recovered by this, as they seem to be due
to generating:
ubfiz x0, x0, #3, #8
add x1, x2, x0
instead of
add x1, x2, x0, lsl #3
Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b
Reviewed-on: https://go-review.googlesource.com/88355
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-02-21 16:15:39 -05:00
// extract width bits of arg0 starting at bit lsb and insert at low end of result, remaining high bits are zeroed
2020-02-17 17:47:34 -08:00
{ name : "UBFX" , argLength : 1 , reg : gp11 , asm : "UBFX" , aux : "ARM64BitField" } ,
cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX,
UBFIZ and UBFX opcodes.
go1 benchmarks results on Amberwing:
name old time/op new time/op delta
FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10)
Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10)
FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9)
FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10)
FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8)
FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9)
GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10)
JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10)
RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10)
RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10)
RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9)
TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9)
RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10)
GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10)
RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10)
RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9)
BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10)
HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10)
RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10)
FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal)
GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10)
Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9)
Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9)
RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9)
Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10)
FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10)
JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10)
Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10)
TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10)
There are some opportunities to improve this change further by adding
patterns to match the "extended register" versions of ADD/SUB/CMP, but I
think that should be evaluated on its own. The regressions in Template
and TimeFormat would likely be recovered by this, as they seem to be due
to generating:
ubfiz x0, x0, #3, #8
add x1, x2, x0
instead of
add x1, x2, x0, lsl #3
Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b
Reviewed-on: https://go-review.googlesource.com/88355
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-02-21 16:15:39 -05:00
2016-07-21 12:42:49 -04:00
// moves
2020-04-30 12:41:28 +02:00
{ name : "MOVDconst" , argLength : 0 , reg : gp01 , aux : "Int64" , asm : "MOVD" , typ : "UInt64" , rematerializeable : true } , // 64 bits from auxint
2016-07-21 12:42:49 -04:00
{ name : "FMOVSconst" , argLength : 0 , reg : fp01 , aux : "Float64" , asm : "FMOVS" , typ : "Float32" , rematerializeable : true } , // auxint as 64-bit float, convert to 32-bit float
{ name : "FMOVDconst" , argLength : 0 , reg : fp01 , aux : "Float64" , asm : "FMOVD" , typ : "Float64" , rematerializeable : true } , // auxint as 64-bit float
2017-03-09 14:46:43 -08:00
{ name : "MOVDaddr" , argLength : 1 , reg : regInfo { inputs : [ ] regMask { buildReg ( "SP" ) | buildReg ( "SB" ) } , outputs : [ ] regMask { gp } } , aux : "SymOff" , asm : "MOVD" , rematerializeable : true , symEffect : "Addr" } , // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB
2016-07-21 12:42:49 -04:00
2024-11-17 16:42:37 -08:00
{ name : "MOVBload" , argLength : 2 , reg : gpload , aux : "SymOff" , asm : "MOVB" , typ : "Int8" , faultOnNilArg0 : true , symEffect : "Read" } , // load from arg0 + auxInt + aux. arg1=mem.
{ name : "MOVBUload" , argLength : 2 , reg : gpload , aux : "SymOff" , asm : "MOVBU" , typ : "UInt8" , faultOnNilArg0 : true , symEffect : "Read" } , // load from arg0 + auxInt + aux. arg1=mem.
{ name : "MOVHload" , argLength : 2 , reg : gpload , aux : "SymOff" , asm : "MOVH" , typ : "Int16" , faultOnNilArg0 : true , symEffect : "Read" } , // load from arg0 + auxInt + aux. arg1=mem.
{ name : "MOVHUload" , argLength : 2 , reg : gpload , aux : "SymOff" , asm : "MOVHU" , typ : "UInt16" , faultOnNilArg0 : true , symEffect : "Read" } , // load from arg0 + auxInt + aux. arg1=mem.
{ name : "MOVWload" , argLength : 2 , reg : gpload , aux : "SymOff" , asm : "MOVW" , typ : "Int32" , faultOnNilArg0 : true , symEffect : "Read" } , // load from arg0 + auxInt + aux. arg1=mem.
{ name : "MOVWUload" , argLength : 2 , reg : gpload , aux : "SymOff" , asm : "MOVWU" , typ : "UInt32" , faultOnNilArg0 : true , symEffect : "Read" } , // load from arg0 + auxInt + aux. arg1=mem.
{ name : "MOVDload" , argLength : 2 , reg : gpload , aux : "SymOff" , asm : "MOVD" , typ : "UInt64" , faultOnNilArg0 : true , symEffect : "Read" } , // load from arg0 + auxInt + aux. arg1=mem.
{ name : "FMOVSload" , argLength : 2 , reg : fpload , aux : "SymOff" , asm : "FMOVS" , typ : "Float32" , faultOnNilArg0 : true , symEffect : "Read" } , // load from arg0 + auxInt + aux. arg1=mem.
{ name : "FMOVDload" , argLength : 2 , reg : fpload , aux : "SymOff" , asm : "FMOVD" , typ : "Float64" , faultOnNilArg0 : true , symEffect : "Read" } , // load from arg0 + auxInt + aux. arg1=mem.
// LDP instructions load the contents of two adjacent locations in memory into registers.
// Address to start loading is addr = arg0 + auxInt + aux.
// x := *(*T)(addr)
// y := *(*T)(addr+sizeof(T))
// arg1=mem
// Returns the tuple <x,y>.
{ name : "LDP" , argLength : 2 , reg : gpload2 , aux : "SymOff" , asm : "LDP" , typ : "(UInt64,UInt64)" , faultOnNilArg0 : true , symEffect : "Read" } , // T=int64 (gp reg destination)
2025-02-14 16:13:44 -08:00
{ name : "LDPW" , argLength : 2 , reg : gpload2 , aux : "SymOff" , asm : "LDPW" , typ : "(UInt32,UInt32)" , faultOnNilArg0 : true , symEffect : "Read" } , // T=int32 (gp reg destination) unsigned extension
{ name : "LDPSW" , argLength : 2 , reg : gpload2 , aux : "SymOff" , asm : "LDPSW" , typ : "(Int32,Int32)" , faultOnNilArg0 : true , symEffect : "Read" } , // T=int32 (gp reg destination) signed extension
2024-11-17 16:42:37 -08:00
{ name : "FLDPD" , argLength : 2 , reg : fpload2 , aux : "SymOff" , asm : "FLDPD" , typ : "(Float64,Float64)" , faultOnNilArg0 : true , symEffect : "Read" } , // T=float64 (fp reg destination)
{ name : "FLDPS" , argLength : 2 , reg : fpload2 , aux : "SymOff" , asm : "FLDPS" , typ : "(Float32,Float32)" , faultOnNilArg0 : true , symEffect : "Read" } , // T=float32 (fp reg destination)
2016-09-13 17:01:01 -07:00
2018-04-16 14:04:26 +00:00
// register indexed load
2018-07-18 09:31:35 +00:00
{ name : "MOVDloadidx" , argLength : 3 , reg : gp2load , asm : "MOVD" , typ : "UInt64" } , // load 64-bit dword from arg0 + arg1, arg2 = mem.
{ name : "MOVWloadidx" , argLength : 3 , reg : gp2load , asm : "MOVW" , typ : "Int32" } , // load 32-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{ name : "MOVWUloadidx" , argLength : 3 , reg : gp2load , asm : "MOVWU" , typ : "UInt32" } , // load 32-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{ name : "MOVHloadidx" , argLength : 3 , reg : gp2load , asm : "MOVH" , typ : "Int16" } , // load 16-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{ name : "MOVHUloadidx" , argLength : 3 , reg : gp2load , asm : "MOVHU" , typ : "UInt16" } , // load 16-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{ name : "MOVBloadidx" , argLength : 3 , reg : gp2load , asm : "MOVB" , typ : "Int8" } , // load 8-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{ name : "MOVBUloadidx" , argLength : 3 , reg : gp2load , asm : "MOVBU" , typ : "UInt8" } , // load 8-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{ name : "FMOVSloadidx" , argLength : 3 , reg : fp2load , asm : "FMOVS" , typ : "Float32" } , // load 32-bit float from arg0 + arg1, arg2=mem.
{ name : "FMOVDloadidx" , argLength : 3 , reg : fp2load , asm : "FMOVD" , typ : "Float64" } , // load 64-bit float from arg0 + arg1, arg2=mem.
2018-04-22 00:51:00 +00:00
// shifted register indexed load
2020-11-27 17:10:33 +02:00
{ name : "MOVHloadidx2" , argLength : 3 , reg : gp2load , asm : "MOVH" , typ : "Int16" } , // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem.
{ name : "MOVHUloadidx2" , argLength : 3 , reg : gp2load , asm : "MOVHU" , typ : "UInt16" } , // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem.
{ name : "MOVWloadidx4" , argLength : 3 , reg : gp2load , asm : "MOVW" , typ : "Int32" } , // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem.
{ name : "MOVWUloadidx4" , argLength : 3 , reg : gp2load , asm : "MOVWU" , typ : "UInt32" } , // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem.
{ name : "MOVDloadidx8" , argLength : 3 , reg : gp2load , asm : "MOVD" , typ : "UInt64" } , // load 64-bit double-word from arg0 + arg1*8, arg2 = mem.
{ name : "FMOVSloadidx4" , argLength : 3 , reg : fp2load , asm : "FMOVS" , typ : "Float32" } , // load 32-bit float from arg0 + arg1*4, arg2 = mem.
{ name : "FMOVDloadidx8" , argLength : 3 , reg : fp2load , asm : "FMOVD" , typ : "Float64" } , // load 64-bit float from arg0 + arg1*8, arg2 = mem.
2018-04-16 14:04:26 +00:00
2017-03-09 14:46:43 -08:00
{ name : "MOVBstore" , argLength : 3 , reg : gpstore , aux : "SymOff" , asm : "MOVB" , typ : "Mem" , faultOnNilArg0 : true , symEffect : "Write" } , // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem.
{ name : "MOVHstore" , argLength : 3 , reg : gpstore , aux : "SymOff" , asm : "MOVH" , typ : "Mem" , faultOnNilArg0 : true , symEffect : "Write" } , // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{ name : "MOVWstore" , argLength : 3 , reg : gpstore , aux : "SymOff" , asm : "MOVW" , typ : "Mem" , faultOnNilArg0 : true , symEffect : "Write" } , // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{ name : "MOVDstore" , argLength : 3 , reg : gpstore , aux : "SymOff" , asm : "MOVD" , typ : "Mem" , faultOnNilArg0 : true , symEffect : "Write" } , // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{ name : "FMOVSstore" , argLength : 3 , reg : fpstore , aux : "SymOff" , asm : "FMOVS" , typ : "Mem" , faultOnNilArg0 : true , symEffect : "Write" } , // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{ name : "FMOVDstore" , argLength : 3 , reg : fpstore , aux : "SymOff" , asm : "FMOVD" , typ : "Mem" , faultOnNilArg0 : true , symEffect : "Write" } , // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
2016-09-13 17:01:01 -07:00
2024-11-17 16:42:37 -08:00
// STP instructions store the contents of two registers to adjacent locations in memory.
// Address to start storing is addr = arg0 + auxInt + aux.
// *(*T)(addr) = arg1
// *(*T)(addr+sizeof(T)) = arg2
// arg3=mem. Returns mem.
{ name : "STP" , argLength : 4 , reg : gpstore2 , aux : "SymOff" , asm : "STP" , typ : "Mem" , faultOnNilArg0 : true , symEffect : "Write" } , // T=int64 (gp reg source)
{ name : "STPW" , argLength : 4 , reg : gpstore2 , aux : "SymOff" , asm : "STPW" , typ : "Mem" , faultOnNilArg0 : true , symEffect : "Write" } , // T=int32 (gp reg source)
{ name : "FSTPD" , argLength : 4 , reg : fpstore2 , aux : "SymOff" , asm : "FSTPD" , typ : "Mem" , faultOnNilArg0 : true , symEffect : "Write" } , // T=float64 (fp reg source)
{ name : "FSTPS" , argLength : 4 , reg : fpstore2 , aux : "SymOff" , asm : "FSTPS" , typ : "Mem" , faultOnNilArg0 : true , symEffect : "Write" } , // T=float32 (fp reg source)
2018-04-16 14:04:26 +00:00
// register indexed store
2024-11-17 16:42:37 -08:00
{ name : "MOVBstoreidx" , argLength : 4 , reg : gpstore2 , asm : "MOVB" , typ : "Mem" } , // store 1 byte of arg2 to arg0 + arg1, arg3 = mem.
{ name : "MOVHstoreidx" , argLength : 4 , reg : gpstore2 , asm : "MOVH" , typ : "Mem" } , // store 2 bytes of arg2 to arg0 + arg1, arg3 = mem.
{ name : "MOVWstoreidx" , argLength : 4 , reg : gpstore2 , asm : "MOVW" , typ : "Mem" } , // store 4 bytes of arg2 to arg0 + arg1, arg3 = mem.
{ name : "MOVDstoreidx" , argLength : 4 , reg : gpstore2 , asm : "MOVD" , typ : "Mem" } , // store 8 bytes of arg2 to arg0 + arg1, arg3 = mem.
{ name : "FMOVSstoreidx" , argLength : 4 , reg : fpstoreidx , asm : "FMOVS" , typ : "Mem" } , // store 32-bit float of arg2 to arg0 + arg1, arg3=mem.
{ name : "FMOVDstoreidx" , argLength : 4 , reg : fpstoreidx , asm : "FMOVD" , typ : "Mem" } , // store 64-bit float of arg2 to arg0 + arg1, arg3=mem.
2018-04-22 00:51:00 +00:00
// shifted register indexed store
2024-11-17 16:42:37 -08:00
{ name : "MOVHstoreidx2" , argLength : 4 , reg : gpstore2 , asm : "MOVH" , typ : "Mem" } , // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem.
{ name : "MOVWstoreidx4" , argLength : 4 , reg : gpstore2 , asm : "MOVW" , typ : "Mem" } , // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem.
{ name : "MOVDstoreidx8" , argLength : 4 , reg : gpstore2 , asm : "MOVD" , typ : "Mem" } , // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem.
{ name : "FMOVSstoreidx4" , argLength : 4 , reg : fpstoreidx , asm : "FMOVS" , typ : "Mem" } , // store 32-bit float of arg2 to arg0 + arg1*4, arg3=mem.
{ name : "FMOVDstoreidx8" , argLength : 4 , reg : fpstoreidx , asm : "FMOVD" , typ : "Mem" } , // store 64-bit float of arg2 to arg0 + arg1*8, arg3=mem.
2018-04-16 14:04:26 +00:00
2018-01-30 12:16:52 -05:00
{ name : "FMOVDgpfp" , argLength : 1 , reg : gpfp , asm : "FMOVD" } , // move int64 to float64 (no conversion)
{ name : "FMOVDfpgp" , argLength : 1 , reg : fpgp , asm : "FMOVD" } , // move float64 to int64 (no conversion)
2018-07-16 04:45:25 +00:00
{ name : "FMOVSgpfp" , argLength : 1 , reg : gpfp , asm : "FMOVS" } , // move 32bits from int to float reg (no conversion)
{ name : "FMOVSfpgp" , argLength : 1 , reg : fpgp , asm : "FMOVS" } , // move 32bits from float to int reg, zero extend (no conversion)
2018-01-30 12:16:52 -05:00
2016-07-21 12:42:49 -04:00
// conversions
{ name : "MOVBreg" , argLength : 1 , reg : gp11 , asm : "MOVB" } , // move from arg0, sign-extended from byte
{ name : "MOVBUreg" , argLength : 1 , reg : gp11 , asm : "MOVBU" } , // move from arg0, unsign-extended from byte
{ name : "MOVHreg" , argLength : 1 , reg : gp11 , asm : "MOVH" } , // move from arg0, sign-extended from half
{ name : "MOVHUreg" , argLength : 1 , reg : gp11 , asm : "MOVHU" } , // move from arg0, unsign-extended from half
{ name : "MOVWreg" , argLength : 1 , reg : gp11 , asm : "MOVW" } , // move from arg0, sign-extended from word
{ name : "MOVWUreg" , argLength : 1 , reg : gp11 , asm : "MOVWU" } , // move from arg0, unsign-extended from word
{ name : "MOVDreg" , argLength : 1 , reg : gp11 , asm : "MOVD" } , // move from arg0
2016-08-03 09:56:36 -04:00
{ name : "MOVDnop" , argLength : 1 , reg : regInfo { inputs : [ ] regMask { gp } , outputs : [ ] regMask { gp } } , resultInArg0 : true } , // nop, return arg0 in same register
2016-07-21 12:42:49 -04:00
{ name : "SCVTFWS" , argLength : 1 , reg : gpfp , asm : "SCVTFWS" } , // int32 -> float32
{ name : "SCVTFWD" , argLength : 1 , reg : gpfp , asm : "SCVTFWD" } , // int32 -> float64
{ name : "UCVTFWS" , argLength : 1 , reg : gpfp , asm : "UCVTFWS" } , // uint32 -> float32
{ name : "UCVTFWD" , argLength : 1 , reg : gpfp , asm : "UCVTFWD" } , // uint32 -> float64
{ name : "SCVTFS" , argLength : 1 , reg : gpfp , asm : "SCVTFS" } , // int64 -> float32
{ name : "SCVTFD" , argLength : 1 , reg : gpfp , asm : "SCVTFD" } , // int64 -> float64
{ name : "UCVTFS" , argLength : 1 , reg : gpfp , asm : "UCVTFS" } , // uint64 -> float32
{ name : "UCVTFD" , argLength : 1 , reg : gpfp , asm : "UCVTFD" } , // uint64 -> float64
{ name : "FCVTZSSW" , argLength : 1 , reg : fpgp , asm : "FCVTZSSW" } , // float32 -> int32
{ name : "FCVTZSDW" , argLength : 1 , reg : fpgp , asm : "FCVTZSDW" } , // float64 -> int32
{ name : "FCVTZUSW" , argLength : 1 , reg : fpgp , asm : "FCVTZUSW" } , // float32 -> uint32
{ name : "FCVTZUDW" , argLength : 1 , reg : fpgp , asm : "FCVTZUDW" } , // float64 -> uint32
{ name : "FCVTZSS" , argLength : 1 , reg : fpgp , asm : "FCVTZSS" } , // float32 -> int64
{ name : "FCVTZSD" , argLength : 1 , reg : fpgp , asm : "FCVTZSD" } , // float64 -> int64
{ name : "FCVTZUS" , argLength : 1 , reg : fpgp , asm : "FCVTZUS" } , // float32 -> uint64
{ name : "FCVTZUD" , argLength : 1 , reg : fpgp , asm : "FCVTZUD" } , // float64 -> uint64
{ name : "FCVTSD" , argLength : 1 , reg : fp11 , asm : "FCVTSD" } , // float32 -> float64
{ name : "FCVTDS" , argLength : 1 , reg : fp11 , asm : "FCVTDS" } , // float64 -> float32
2018-02-16 09:22:32 -05:00
// floating-point round to integral
{ name : "FRINTAD" , argLength : 1 , reg : fp11 , asm : "FRINTAD" } ,
{ name : "FRINTMD" , argLength : 1 , reg : fp11 , asm : "FRINTMD" } ,
2018-05-22 06:58:32 +00:00
{ name : "FRINTND" , argLength : 1 , reg : fp11 , asm : "FRINTND" } ,
2018-02-16 09:22:32 -05:00
{ name : "FRINTPD" , argLength : 1 , reg : fp11 , asm : "FRINTPD" } ,
{ name : "FRINTZD" , argLength : 1 , reg : fp11 , asm : "FRINTZD" } ,
2017-08-13 22:36:47 +00:00
// conditional instructions; auxint is
// one of the arm64 comparison pseudo-ops (LessThan, LessThanU, etc.)
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
{ name : "CSEL" , argLength : 3 , reg : gp2flags1 , asm : "CSEL" , aux : "CCop" } , // auxint(flags) ? arg0 : arg1
{ name : "CSEL0" , argLength : 2 , reg : gp1flags1 , asm : "CSEL" , aux : "CCop" } , // auxint(flags) ? arg0 : 0
{ name : "CSINC" , argLength : 3 , reg : gp2flags1 , asm : "CSINC" , aux : "CCop" } , // auxint(flags) ? arg0 : arg1 + 1
{ name : "CSINV" , argLength : 3 , reg : gp2flags1 , asm : "CSINV" , aux : "CCop" } , // auxint(flags) ? arg0 : ^arg1
{ name : "CSNEG" , argLength : 3 , reg : gp2flags1 , asm : "CSNEG" , aux : "CCop" } , // auxint(flags) ? arg0 : -arg1
{ name : "CSETM" , argLength : 1 , reg : readflags , asm : "CSETM" , aux : "CCop" } , // auxint(flags) ? -1 : 0
2016-07-22 06:41:14 -04:00
2025-08-21 17:41:13 +03:00
// conditional comparison instructions; auxint is
// combination of Cond, Nzcv and optional ConstValue
// Behavior:
// If the condition 'Cond' evaluates to true against current flags,
// flags are set to the result of the comparison operation.
// Otherwise, flags are set to the fallback value 'Nzcv'.
{ name : "CCMP" , argLength : 3 , reg : gp2flagsflags , asm : "CCMP" , aux : "ARM64ConditionalParams" , typ : "Flag" } , // If Cond then flags = CMP arg0 arg1 else flags = Nzcv
{ name : "CCMN" , argLength : 3 , reg : gp2flagsflags , asm : "CCMN" , aux : "ARM64ConditionalParams" , typ : "Flag" } , // If Cond then flags = CMN arg0 arg1 else flags = Nzcv
{ name : "CCMPconst" , argLength : 2 , reg : gp1flagsflags , asm : "CCMP" , aux : "ARM64ConditionalParams" , typ : "Flag" } , // If Cond then flags = CMPconst [ConstValue] arg0 else flags = Nzcv
{ name : "CCMNconst" , argLength : 2 , reg : gp1flagsflags , asm : "CCMN" , aux : "ARM64ConditionalParams" , typ : "Flag" } , // If Cond then flags = CMNconst [ConstValue] arg0 else flags = Nzcv
{ name : "CCMPW" , argLength : 3 , reg : gp2flagsflags , asm : "CCMPW" , aux : "ARM64ConditionalParams" , typ : "Flag" } , // If Cond then flags = CMPW arg0 arg1 else flags = Nzcv
{ name : "CCMNW" , argLength : 3 , reg : gp2flagsflags , asm : "CCMNW" , aux : "ARM64ConditionalParams" , typ : "Flag" } , // If Cond then flags = CMNW arg0 arg1 else flags = Nzcv
{ name : "CCMPWconst" , argLength : 2 , reg : gp1flagsflags , asm : "CCMPW" , aux : "ARM64ConditionalParams" , typ : "Flag" } , // If Cond then flags = CCMPWconst [ConstValue] arg0 else flags = Nzcv
{ name : "CCMNWconst" , argLength : 2 , reg : gp1flagsflags , asm : "CCMNW" , aux : "ARM64ConditionalParams" , typ : "Flag" } , // If Cond then flags = CCMNWconst [ConstValue] arg0 else flags = Nzcv
2016-07-21 12:42:49 -04:00
// function calls
2021-05-28 22:23:00 -04:00
{ name : "CALLstatic" , argLength : - 1 , reg : regInfo { clobbers : callerSave } , aux : "CallOff" , clobberFlags : true , call : true } , // call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem
2021-10-25 11:51:25 -04:00
{ name : "CALLtail" , argLength : - 1 , reg : regInfo { clobbers : callerSave } , aux : "CallOff" , clobberFlags : true , call : true , tailCall : true } , // tail call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem
2021-05-28 22:23:00 -04:00
{ name : "CALLclosure" , argLength : - 1 , reg : regInfo { inputs : [ ] regMask { gpsp , buildReg ( "R26" ) , 0 } , clobbers : callerSave } , aux : "CallOff" , clobberFlags : true , call : true } , // call function via closure. arg0=codeptr, arg1=closure, last arg=mem, auxint=argsize, returns mem
{ name : "CALLinter" , argLength : - 1 , reg : regInfo { inputs : [ ] regMask { gp } , clobbers : callerSave } , aux : "CallOff" , clobberFlags : true , call : true } , // call fn by pointer. arg0=codeptr, last arg=mem, auxint=argsize, returns mem
2016-07-21 12:42:49 -04:00
// pseudo-ops
2025-04-05 08:34:12 -07:00
{ name : "LoweredNilCheck" , argLength : 2 , reg : regInfo { inputs : [ ] regMask { gpg } } , nilCheck : true , faultOnNilArg0 : true } , // panic if arg0 is nil. arg1=mem.
2016-07-21 12:42:49 -04:00
cmd/compile: fix wrong complement for arm64 floating-point comparisons
Consider the following example,
func test(a, b float64, x uint64) uint64 {
if a < b {
x = 0
}
return x
}
func main() {
fmt.Println(test(1, math.NaN(), 123))
}
The output is 0, but the expectation is 123.
This is because the rewrite rule
(CSEL [cc] (MOVDconst [0]) y flag) => (CSEL0 [arm64Negate(cc)] y flag)
converts
FCMP NaN, 1
CSEL MI, 0, 123, R0 // if 1 < NaN then R0 = 0 else R0 = 123
to
FCMP NaN, 1
CSEL GE, 123, 0, R0 // if 1 >= NaN then R0 = 123 else R0 = 0
But both 1 < NaN and 1 >= NaN are false. So the output is 0, not 123.
The root cause is arm64Negate not handle negation of floating comparison
correctly. According to the ARM manual, the meaning of MI, GE, and PL
are
MI: Less than
GE: Greater than or equal to
PL: Greater than, equal to, or unordered
Because NaN cannot be compared with other numbers, the result of such
comparison is unordered. So when NaN is involved, unlike integer, the
result of !(a < b) is not a >= b, it is a >= b || a is NaN || b is NaN.
This is exactly what PL means. We add NotLessThanF to represent PL. Then
the negation of LessThanF is NotLessThanF rather than GreaterEqualF. The
same reason for the other floating comparison operations.
Fixes #43619
Change-Id: Ia511b0027ad067436bace9fbfd261dbeaae01bcd
Reviewed-on: https://go-review.googlesource.com/c/go/+/283572
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Keith Randall <khr@golang.org>
2021-01-08 10:20:34 +08:00
{ name : "Equal" , argLength : 1 , reg : readflags } , // bool, true flags encode x==y false otherwise.
{ name : "NotEqual" , argLength : 1 , reg : readflags } , // bool, true flags encode x!=y false otherwise.
{ name : "LessThan" , argLength : 1 , reg : readflags } , // bool, true flags encode signed x<y false otherwise.
{ name : "LessEqual" , argLength : 1 , reg : readflags } , // bool, true flags encode signed x<=y false otherwise.
{ name : "GreaterThan" , argLength : 1 , reg : readflags } , // bool, true flags encode signed x>y false otherwise.
{ name : "GreaterEqual" , argLength : 1 , reg : readflags } , // bool, true flags encode signed x>=y false otherwise.
{ name : "LessThanU" , argLength : 1 , reg : readflags } , // bool, true flags encode unsigned x<y false otherwise.
{ name : "LessEqualU" , argLength : 1 , reg : readflags } , // bool, true flags encode unsigned x<=y false otherwise.
{ name : "GreaterThanU" , argLength : 1 , reg : readflags } , // bool, true flags encode unsigned x>y false otherwise.
{ name : "GreaterEqualU" , argLength : 1 , reg : readflags } , // bool, true flags encode unsigned x>=y false otherwise.
{ name : "LessThanF" , argLength : 1 , reg : readflags } , // bool, true flags encode floating-point x<y false otherwise.
{ name : "LessEqualF" , argLength : 1 , reg : readflags } , // bool, true flags encode floating-point x<=y false otherwise.
{ name : "GreaterThanF" , argLength : 1 , reg : readflags } , // bool, true flags encode floating-point x>y false otherwise.
{ name : "GreaterEqualF" , argLength : 1 , reg : readflags } , // bool, true flags encode floating-point x>=y false otherwise.
{ name : "NotLessThanF" , argLength : 1 , reg : readflags } , // bool, true flags encode floating-point x>=y || x is unordered with y, false otherwise.
{ name : "NotLessEqualF" , argLength : 1 , reg : readflags } , // bool, true flags encode floating-point x>y || x is unordered with y, false otherwise.
{ name : "NotGreaterThanF" , argLength : 1 , reg : readflags } , // bool, true flags encode floating-point x<=y || x is unordered with y, false otherwise.
{ name : "NotGreaterEqualF" , argLength : 1 , reg : readflags } , // bool, true flags encode floating-point x<y || x is unordered with y, false otherwise.
cmd/compile: optimize cmp to cmn under conditions < and >= on arm64
Under the right conditions we can optimize cmp comparisons to cmn
comparisons, such as:
func foo(a, b int) int {
var c int
if a + b < 0 {
c = 1
}
return c
}
Previously it's compiled as:
ADD R1, R0, R1
CMP $0, R1
CSET LT, R0
With this CL it's compiled as:
CMN R1, R0
CSET MI, R0
Here we need to pay attention to the overflow situation of a+b, the MI
flag means N==1, which doesn't honor the overflow flag V, its value
depends only on the sign of the result. So it has the same semantic of
the Go code, so it's correct.
Similarly, this CL also optimizes the case of >= comparison
using the PL conditional flag.
Change-Id: I47179faba5b30cca84ea69bafa2ad5241bf6dfba
Reviewed-on: https://go-review.googlesource.com/c/go/+/476116
Run-TryBot: Eric Fang <eric.fang@arm.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
2023-03-14 09:25:07 +08:00
{ name : "LessThanNoov" , argLength : 1 , reg : readflags } , // bool, true flags encode signed x<y but without honoring overflow, false otherwise.
{ name : "GreaterEqualNoov" , argLength : 1 , reg : readflags } , // bool, true flags encode signed x>=y but without honoring overflow, false otherwise.
2025-06-04 17:14:01 -07:00
// medium zeroing
2016-07-22 06:41:14 -04:00
// arg0 = address of memory to zero
// arg1 = mem
2025-06-04 17:14:01 -07:00
// auxint = # of bytes to zero
2016-07-22 06:41:14 -04:00
// returns mem
{
2025-06-04 17:14:01 -07:00
name : "LoweredZero" ,
2016-07-22 06:41:14 -04:00
aux : "Int64" ,
argLength : 2 ,
reg : regInfo {
2025-06-04 17:14:01 -07:00
inputs : [ ] regMask { gp } ,
2016-07-22 06:41:14 -04:00
} ,
2025-06-04 17:14:01 -07:00
faultOnNilArg0 : true ,
2016-07-22 06:41:14 -04:00
} ,
// large zeroing
2025-06-04 17:14:01 -07:00
// arg0 = address of memory to zero
// arg1 = mem
// auxint = # of bytes to zero
2016-07-22 06:41:14 -04:00
// returns mem
{
2025-06-04 17:14:01 -07:00
name : "LoweredZeroLoop" ,
aux : "Int64" ,
argLength : 2 ,
2016-07-22 06:41:14 -04:00
reg : regInfo {
2025-06-04 17:14:01 -07:00
inputs : [ ] regMask { gp } ,
clobbersArg0 : true ,
2016-07-22 06:41:14 -04:00
} ,
2016-09-13 17:01:01 -07:00
faultOnNilArg0 : true ,
2025-06-04 17:14:01 -07:00
needIntTemp : true ,
2016-07-22 06:41:14 -04:00
} ,
2025-06-04 21:49:08 -07:00
// medium copying
// arg0 = address of dst memory
// arg1 = address of src memory
2016-09-27 08:57:02 -04:00
// arg2 = mem
2025-06-04 21:49:08 -07:00
// auxint = # of bytes to copy
2016-09-27 08:57:02 -04:00
// returns mem
{
2025-06-04 21:49:08 -07:00
name : "LoweredMove" ,
2016-09-27 08:57:02 -04:00
aux : "Int64" ,
argLength : 3 ,
reg : regInfo {
2025-06-04 21:49:08 -07:00
inputs : [ ] regMask { gp &^ r24to25 , gp &^ r24to25 } ,
clobbers : r24to25 , // TODO: figure out needIntTemp x2
2016-09-27 08:57:02 -04:00
} ,
2025-06-04 21:49:08 -07:00
faultOnNilArg0 : true ,
faultOnNilArg1 : true ,
2016-09-27 08:57:02 -04:00
} ,
2025-06-04 21:49:08 -07:00
// large copying
// arg0 = address of dst memory
// arg1 = address of src memory
// arg2 = mem
// auxint = # of bytes to copy
2016-07-22 06:41:14 -04:00
// returns mem
{
2025-06-04 21:49:08 -07:00
name : "LoweredMoveLoop" ,
aux : "Int64" ,
argLength : 3 ,
2016-07-22 06:41:14 -04:00
reg : regInfo {
2025-06-04 21:49:08 -07:00
inputs : [ ] regMask { gp &^ r23to25 , gp &^ r23to25 } ,
clobbers : r23to25 , // TODO: figure out needIntTemp x3
clobbersArg0 : true ,
clobbersArg1 : true ,
2016-07-22 06:41:14 -04:00
} ,
2016-09-13 17:01:01 -07:00
faultOnNilArg0 : true ,
faultOnNilArg1 : true ,
2016-07-22 06:41:14 -04:00
} ,
2016-07-21 12:42:49 -04:00
// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
// and sorts it to the very beginning of the block to prevent other
// use of R26 (arm64.REGCTXT, the closure pointer)
2018-02-28 16:30:07 -05:00
{ name : "LoweredGetClosurePtr" , reg : regInfo { outputs : [ ] regMask { buildReg ( "R26" ) } } , zeroWidth : true } ,
2016-07-21 12:42:49 -04:00
2022-11-26 15:03:51 -08:00
// LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem
{ name : "LoweredGetCallerSP" , argLength : 1 , reg : gp01 , rematerializeable : true } ,
2017-10-09 15:33:29 -04:00
2018-04-25 08:38:09 +00:00
// LoweredGetCallerPC evaluates to the PC to which its "caller" will return.
2024-09-16 14:07:43 -04:00
// I.e., if f calls g "calls" sys.GetCallerPC,
2018-04-25 08:38:09 +00:00
// the result should be the PC within f that g will return to.
// See runtime/stubs.go for a more detailed discussion.
{ name : "LoweredGetCallerPC" , reg : gp01 , rematerializeable : true } ,
2020-06-15 22:52:56 -07:00
// Constant flag value.
// Note: there's an "unordered" outcome for floating-point
2016-07-21 12:42:49 -04:00
// comparisons, but we don't use such a beast yet.
2020-06-15 22:52:56 -07:00
// This op is for temporary use by rewrite rules. It
2016-07-21 12:42:49 -04:00
// cannot appear in the generated assembly.
2020-06-15 22:52:56 -07:00
{ name : "FlagConstant" , aux : "FlagConstant" } ,
2016-07-21 12:42:49 -04:00
// (InvertFlags (CMP a b)) == (CMP b a)
// InvertFlags is a pseudo-op which can't appear in assembly output.
{ name : "InvertFlags" , argLength : 1 } , // reverse direction of arg0
2016-08-29 16:26:57 -04:00
// atomic loads.
2016-09-12 15:24:11 -04:00
// load from arg0. arg1=mem. auxint must be zero.
2016-08-29 16:26:57 -04:00
// returns <value,memory> so they can be properly ordered with other loads.
2016-09-13 17:01:01 -07:00
{ name : "LDAR" , argLength : 2 , reg : gpload , asm : "LDAR" , faultOnNilArg0 : true } ,
2019-03-28 14:58:06 -04:00
{ name : "LDARB" , argLength : 2 , reg : gpload , asm : "LDARB" , faultOnNilArg0 : true } ,
2016-09-13 17:01:01 -07:00
{ name : "LDARW" , argLength : 2 , reg : gpload , asm : "LDARW" , faultOnNilArg0 : true } ,
2016-08-29 16:26:57 -04:00
// atomic stores.
2016-09-12 15:24:11 -04:00
// store arg1 to arg0. arg2=mem. returns memory. auxint must be zero.
2019-10-23 10:20:49 -04:00
{ name : "STLRB" , argLength : 3 , reg : gpstore , asm : "STLRB" , faultOnNilArg0 : true , hasSideEffects : true } ,
2017-02-21 15:22:52 -05:00
{ name : "STLR" , argLength : 3 , reg : gpstore , asm : "STLR" , faultOnNilArg0 : true , hasSideEffects : true } ,
{ name : "STLRW" , argLength : 3 , reg : gpstore , asm : "STLRW" , faultOnNilArg0 : true , hasSideEffects : true } ,
2016-08-29 16:26:57 -04:00
// atomic exchange.
2016-09-12 15:24:11 -04:00
// store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>. auxint must be zero.
2016-08-29 16:26:57 -04:00
// LDAXR (Rarg0), Rout
// STLXR Rarg1, (Rarg0), Rtmp
// CBNZ Rtmp, -2(PC)
2019-10-25 00:51:10 -04:00
{ name : "LoweredAtomicExchange64" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true } ,
{ name : "LoweredAtomicExchange32" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true } ,
2024-10-07 20:53:01 +00:00
{ name : "LoweredAtomicExchange8" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true } ,
2016-08-29 16:26:57 -04:00
2020-11-04 16:18:23 +00:00
// atomic exchange variant.
// store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>. auxint must be zero.
// SWPALD Rarg1, (Rarg0), Rout
{ name : "LoweredAtomicExchange64Variant" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true } ,
{ name : "LoweredAtomicExchange32Variant" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true } ,
2024-10-07 20:53:01 +00:00
{ name : "LoweredAtomicExchange8Variant" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true } ,
2020-11-04 16:18:23 +00:00
2016-08-29 16:26:57 -04:00
// atomic add.
2016-09-12 15:24:11 -04:00
// *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
2016-08-29 16:26:57 -04:00
// LDAXR (Rarg0), Rout
// ADD Rarg1, Rout
// STLXR Rout, (Rarg0), Rtmp
// CBNZ Rtmp, -3(PC)
2019-10-25 00:51:10 -04:00
{ name : "LoweredAtomicAdd64" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true } ,
{ name : "LoweredAtomicAdd32" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true } ,
2016-08-29 16:26:57 -04:00
2017-11-03 02:05:28 +00:00
// atomic add variant.
// *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
// LDADDAL (Rarg0), Rarg1, Rout
// ADD Rarg1, Rout
{ name : "LoweredAtomicAdd64Variant" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true } ,
{ name : "LoweredAtomicAdd32Variant" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true } ,
2016-08-29 16:26:57 -04:00
// atomic compare and swap.
2016-09-12 15:24:11 -04:00
// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
2016-08-29 16:26:57 -04:00
// if *arg0 == arg1 {
// *arg0 = arg2
// return (true, memory)
// } else {
// return (false, memory)
// }
// LDAXR (Rarg0), Rtmp
// CMP Rarg1, Rtmp
// BNE 3(PC)
// STLXR Rarg2, (Rarg0), Rtmp
// CBNZ Rtmp, -4(PC)
// CSET EQ, Rout
2019-10-25 00:51:10 -04:00
{ name : "LoweredAtomicCas64" , argLength : 4 , reg : gpcas , resultNotInArgs : true , clobberFlags : true , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true } ,
{ name : "LoweredAtomicCas32" , argLength : 4 , reg : gpcas , resultNotInArgs : true , clobberFlags : true , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true } ,
2016-09-12 15:24:11 -04:00
2020-11-04 16:18:23 +00:00
// atomic compare and swap variant.
// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
// if *arg0 == arg1 {
// *arg0 = arg2
// return (true, memory)
// } else {
// return (false, memory)
// }
// MOV Rarg1, Rtmp
// CASAL Rtmp, (Rarg0), Rarg2
// CMP Rarg1, Rtmp
// CSET EQ, Rout
{ name : "LoweredAtomicCas64Variant" , argLength : 4 , reg : gpcas , resultNotInArgs : true , clobberFlags : true , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true } ,
{ name : "LoweredAtomicCas32Variant" , argLength : 4 , reg : gpcas , resultNotInArgs : true , clobberFlags : true , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true } ,
2016-09-12 15:24:11 -04:00
// atomic and/or.
2024-04-10 08:45:02 +00:00
// *arg0 &= (|=) arg1. arg2=mem. returns <old content of *arg0, memory>. auxint must be zero.
2020-10-16 16:34:52 -04:00
// LDAXR (Rarg0), Rout
2024-04-10 08:45:02 +00:00
// AND/OR Rarg1, Rout, tempReg
// STLXR tempReg, (Rarg0), Rtmp
2016-09-12 15:24:11 -04:00
// CBNZ Rtmp, -3(PC)
2024-04-10 08:45:02 +00:00
{ name : "LoweredAtomicAnd8" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , asm : "AND" , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true , needIntTemp : true } ,
{ name : "LoweredAtomicOr8" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , asm : "ORR" , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true , needIntTemp : true } ,
{ name : "LoweredAtomicAnd64" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , asm : "AND" , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true , needIntTemp : true } ,
{ name : "LoweredAtomicOr64" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , asm : "ORR" , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true , needIntTemp : true } ,
{ name : "LoweredAtomicAnd32" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , asm : "AND" , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true , needIntTemp : true } ,
{ name : "LoweredAtomicOr32" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , asm : "ORR" , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true , needIntTemp : true } ,
2017-11-15 14:54:24 -08:00
2020-11-04 16:18:23 +00:00
// atomic and/or variant.
2024-04-10 08:45:02 +00:00
// *arg0 &= (|=) arg1. arg2=mem. returns <old content of *arg0, memory>. auxint must be zero.
2020-11-04 16:18:23 +00:00
// AND:
// MNV Rarg1, Rtemp
// LDANDALB Rtemp, (Rarg0), Rout
// OR:
// LDORALB Rarg1, (Rarg0), Rout
2024-04-10 08:45:02 +00:00
{ name : "LoweredAtomicAnd8Variant" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true } ,
{ name : "LoweredAtomicOr8Variant" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true } ,
{ name : "LoweredAtomicAnd64Variant" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true } ,
{ name : "LoweredAtomicOr64Variant" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true } ,
{ name : "LoweredAtomicAnd32Variant" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true , unsafePoint : true } ,
{ name : "LoweredAtomicOr32Variant" , argLength : 3 , reg : gpxchg , resultNotInArgs : true , faultOnNilArg0 : true , hasSideEffects : true } ,
2020-11-04 16:18:23 +00:00
2022-11-01 16:46:43 -07:00
// LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed
2017-11-15 14:54:24 -08:00
// It saves all GP registers if necessary,
// but clobbers R30 (LR) because it's a call.
2019-06-28 09:30:36 -04:00
// R16 and R17 may be clobbered by linker trampoline.
2022-11-01 16:46:43 -07:00
// Returns a pointer to a write barrier buffer in R25.
{ name : "LoweredWB" , argLength : 1 , reg : regInfo { clobbers : ( callerSave &^ gpg ) | buildReg ( "R16 R17 R30" ) , outputs : [ ] regMask { buildReg ( "R25" ) } } , clobberFlags : true , aux : "Int64" } ,
2019-02-06 14:12:36 -08:00
2025-06-18 15:06:55 -07:00
// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
// the RC and CR versions are used when one of the arguments is a constant. CC is used
// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
// failure means the length must have also been 0).
// AuxInt contains a report code (see PanicBounds in genericOps.go).
{ name : "LoweredPanicBoundsRR" , argLength : 3 , aux : "Int64" , reg : regInfo { inputs : [ ] regMask { first16 , first16 } } , typ : "Mem" , call : true } , // arg0=x, arg1=y, arg2=mem, returns memory.
{ name : "LoweredPanicBoundsRC" , argLength : 2 , aux : "PanicBoundsC" , reg : regInfo { inputs : [ ] regMask { first16 } } , typ : "Mem" , call : true } , // arg0=x, arg1=mem, returns memory.
{ name : "LoweredPanicBoundsCR" , argLength : 2 , aux : "PanicBoundsC" , reg : regInfo { inputs : [ ] regMask { first16 } } , typ : "Mem" , call : true } , // arg0=y, arg1=mem, returns memory.
{ name : "LoweredPanicBoundsCC" , argLength : 1 , aux : "PanicBoundsCC" , reg : regInfo { } , typ : "Mem" , call : true } , // arg0=mem, returns memory.
2021-06-15 14:04:30 +00:00
// Prefetch instruction
// Do prefetch arg0 address with option aux. arg0=addr, arg1=memory, aux=option.
{ name : "PRFM" , argLength : 2 , aux : "Int64" , reg : prefreg , asm : "PRFM" , hasSideEffects : true } ,
2021-06-11 09:27:09 +00:00
// Publication barrier
{ name : "DMB" , argLength : 1 , aux : "Int64" , asm : "DMB" , hasSideEffects : true } , // Do data barrier. arg0=memory, aux=option.
2024-11-24 15:29:56 -08:00
{ name : "ZERO" , zeroWidth : true , fixedReg : true } , // reads-as-zero register
2016-07-21 12:42:49 -04:00
}
blocks := [ ] blockData {
2019-08-12 20:19:58 +01:00
{ name : "EQ" , controls : 1 } ,
{ name : "NE" , controls : 1 } ,
{ name : "LT" , controls : 1 } ,
{ name : "LE" , controls : 1 } ,
{ name : "GT" , controls : 1 } ,
{ name : "GE" , controls : 1 } ,
{ name : "ULT" , controls : 1 } ,
{ name : "ULE" , controls : 1 } ,
{ name : "UGT" , controls : 1 } ,
{ name : "UGE" , controls : 1 } ,
2020-04-30 11:04:02 +02:00
{ name : "Z" , controls : 1 } , // Control == 0 (take a register instead of flags)
{ name : "NZ" , controls : 1 } , // Control != 0
{ name : "ZW" , controls : 1 } , // Control == 0, 32-bit
{ name : "NZW" , controls : 1 } , // Control != 0, 32-bit
{ name : "TBZ" , controls : 1 , aux : "Int64" } , // Control & (1 << AuxInt) == 0
{ name : "TBNZ" , controls : 1 , aux : "Int64" } , // Control & (1 << AuxInt) != 0
2019-08-12 20:19:58 +01:00
{ name : "FLT" , controls : 1 } ,
{ name : "FLE" , controls : 1 } ,
{ name : "FGT" , controls : 1 } ,
{ name : "FGE" , controls : 1 } ,
cmd/compile: fix incorrect rewriting to if condition
Some ARM64 rewriting rules convert 'comparing to zero' conditions of if
statements to a simplified version utilizing CMN and CMP instructions to
branch over condition flags, in order to save one Add or Sub caculation.
Such optimizations lead to wrong branching in case an overflow/underflow
occurs when executing CMN or CMP.
Fix the issue by introducing new block opcodes that don't honor the
overflow/underflow flag, in the following categories:
Block-Op Meaning ARM condition codes
1. LTnoov less than MI
2. GEnoov greater than or equal PL
3. LEnoov less than or equal MI || EQ
4. GTnoov greater than NEQ & PL
The backend generates two consecutive branch instructions for 'LEnoov'
and 'GTnoov' to model their expected behavior. A slight change to 'gc'
and amd64/386 backends is made to unify the code generation.
Add a test 'TestCondRewrite' as justification, it covers 32 incorrect rules
identified on arm64, more might be needed on other arches, like 32-bit arm.
Add two benchmarks profiling the aforementioned category 1&2 and category
3&4 separetely, we expect the first two categories will show performance
improvement and the second will not result in visible regression compared with
the non-optimized version.
This change also updates TestFormats to support using %#x.
Examples exhibiting where does the issue come from:
1: 'if x + 3 < 0' might be converted to:
before:
CMN $3, R0
BGE <else branch> // wrong branch is taken if 'x+3' overflows
after:
CMN $3, R0
BPL <else branch>
2: 'if y - 3 > 0' might be converted to:
before:
CMP $3, R0
BLE <else branch> // wrong branch is taken if 'y-3' underflows
after:
CMP $3, R0
BMI <else branch>
BEQ <else branch>
Benchmark data from different kinds of arm64 servers, 'old' is the non-optimized
version (not the parent commit), generally the optimization version outperforms.
S1:
name old time/op new time/op delta
CondRewrite/SoloJump 13.6ns ± 0% 12.9ns ± 0% -5.15% (p=0.000 n=10+10)
CondRewrite/CombJump 13.8ns ± 1% 12.9ns ± 0% -6.32% (p=0.000 n=10+10)
S2:
name old time/op new time/op delta
CondRewrite/SoloJump 11.6ns ± 0% 10.9ns ± 0% -6.03% (p=0.000 n=10+10)
CondRewrite/CombJump 11.4ns ± 0% 10.8ns ± 1% -5.53% (p=0.000 n=10+10)
S3:
name old time/op new time/op delta
CondRewrite/SoloJump 7.36ns ± 0% 7.50ns ± 0% +1.79% (p=0.000 n=9+10)
CondRewrite/CombJump 7.35ns ± 0% 7.75ns ± 0% +5.51% (p=0.000 n=8+9)
S4:
name old time/op new time/op delta
CondRewrite/SoloJump-224 11.5ns ± 1% 10.9ns ± 0% -4.97% (p=0.000 n=10+10)
CondRewrite/CombJump-224 11.9ns ± 0% 11.5ns ± 0% -2.95% (p=0.000 n=10+10)
S5:
name old time/op new time/op delta
CondRewrite/SoloJump 10.0ns ± 0% 10.0ns ± 0% -0.45% (p=0.000 n=9+10)
CondRewrite/CombJump 9.93ns ± 0% 9.77ns ± 0% -1.53% (p=0.000 n=10+9)
Go1 perf. data:
name old time/op new time/op delta
BinaryTree17 6.29s ± 1% 6.30s ± 1% ~ (p=1.000 n=5+5)
Fannkuch11 5.40s ± 0% 5.40s ± 0% ~ (p=0.841 n=5+5)
FmtFprintfEmpty 97.9ns ± 0% 98.9ns ± 3% ~ (p=0.937 n=4+5)
FmtFprintfString 171ns ± 3% 171ns ± 2% ~ (p=0.754 n=5+5)
FmtFprintfInt 212ns ± 0% 217ns ± 6% +2.55% (p=0.008 n=5+5)
FmtFprintfIntInt 296ns ± 1% 297ns ± 2% ~ (p=0.516 n=5+5)
FmtFprintfPrefixedInt 371ns ± 2% 374ns ± 7% ~ (p=1.000 n=5+5)
FmtFprintfFloat 435ns ± 1% 439ns ± 2% ~ (p=0.056 n=5+5)
FmtManyArgs 1.37µs ± 1% 1.36µs ± 1% ~ (p=0.730 n=5+5)
GobDecode 14.6ms ± 4% 14.4ms ± 4% ~ (p=0.690 n=5+5)
GobEncode 11.8ms ±20% 11.6ms ±15% ~ (p=1.000 n=5+5)
Gzip 507ms ± 0% 491ms ± 0% -3.22% (p=0.008 n=5+5)
Gunzip 73.8ms ± 0% 73.9ms ± 0% ~ (p=0.690 n=5+5)
HTTPClientServer 116µs ± 0% 116µs ± 0% ~ (p=0.686 n=4+4)
JSONEncode 21.8ms ± 1% 21.6ms ± 2% ~ (p=0.151 n=5+5)
JSONDecode 104ms ± 1% 103ms ± 1% -1.08% (p=0.016 n=5+5)
Mandelbrot200 9.53ms ± 0% 9.53ms ± 0% ~ (p=0.421 n=5+5)
GoParse 7.55ms ± 1% 7.51ms ± 1% ~ (p=0.151 n=5+5)
RegexpMatchEasy0_32 158ns ± 0% 158ns ± 0% ~ (all equal)
RegexpMatchEasy0_1K 606ns ± 1% 608ns ± 3% ~ (p=0.937 n=5+5)
RegexpMatchEasy1_32 143ns ± 0% 144ns ± 1% ~ (p=0.095 n=5+4)
RegexpMatchEasy1_1K 927ns ± 2% 944ns ± 2% ~ (p=0.056 n=5+5)
RegexpMatchMedium_32 16.0ns ± 0% 16.0ns ± 0% ~ (all equal)
RegexpMatchMedium_1K 69.3µs ± 2% 69.7µs ± 0% ~ (p=0.690 n=5+5)
RegexpMatchHard_32 3.73µs ± 0% 3.73µs ± 1% ~ (p=0.984 n=5+5)
RegexpMatchHard_1K 111µs ± 1% 110µs ± 0% ~ (p=0.151 n=5+5)
Revcomp 1.91s ±47% 1.77s ±68% ~ (p=1.000 n=5+5)
Template 138ms ± 1% 138ms ± 1% ~ (p=1.000 n=5+5)
TimeParse 787ns ± 2% 785ns ± 1% ~ (p=0.540 n=5+5)
TimeFormat 729ns ± 1% 726ns ± 1% ~ (p=0.151 n=5+5)
Updates #38740
Change-Id: I06c604874acdc1e63e66452dadee5df053045222
Reviewed-on: https://go-review.googlesource.com/c/go/+/233097
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
2020-05-06 09:54:40 +00:00
{ name : "LTnoov" , controls : 1 } , // 'LT' but without honoring overflow
{ name : "LEnoov" , controls : 1 } , // 'LE' but without honoring overflow
{ name : "GTnoov" , controls : 1 } , // 'GT' but without honoring overflow
{ name : "GEnoov" , controls : 1 } , // 'GE' but without honoring overflow
2022-04-25 17:18:19 -04:00
// JUMPTABLE implements jump tables.
// Aux is the symbol (an *obj.LSym) for the jump table.
// control[0] is the index into the jump table.
// control[1] is the address of the jump table (the address of the symbol stored in Aux).
{ name : "JUMPTABLE" , controls : 2 , aux : "Sym" } ,
2016-07-21 12:42:49 -04:00
}
archs = append ( archs , arch {
2021-06-01 16:57:59 -07:00
name : "ARM64" ,
pkg : "cmd/internal/obj/arm64" ,
genfile : "../../arm64/ssa.go" ,
ops : ops ,
blocks : blocks ,
regnames : regNamesARM64 ,
2021-05-25 11:53:04 -04:00
ParamIntRegNames : "R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15" ,
ParamFloatRegNames : "F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15" ,
2021-06-01 16:57:59 -07:00
gpregmask : gp ,
fpregmask : fp ,
framepointerreg : - 1 , // not used
linkreg : int8 ( num [ "R30" ] ) ,
2016-07-21 12:42:49 -04:00
} )
}