2019-11-04 04:40:47 +11:00
|
|
|
// Code generated from gen/RISCV64.rules; DO NOT EDIT.
|
|
|
|
|
// generated with: cd gen; go run *.go
|
|
|
|
|
|
|
|
|
|
package ssa
|
|
|
|
|
|
|
|
|
|
import "math"
|
|
|
|
|
|
|
|
|
|
func rewriteValueRISCV64(v *Value) bool {
|
|
|
|
|
switch v.Op {
|
|
|
|
|
case OpAdd16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64ADD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAdd32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64ADD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAdd32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FADDS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAdd64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64ADD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAdd64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FADDD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAdd8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64ADD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAddPtr:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64ADD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAddr:
|
2020-04-12 20:05:14 -07:00
|
|
|
return rewriteValueRISCV64_OpAddr(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAnd16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64AND
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAnd32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64AND
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAnd64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64AND
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAnd8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64AND
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAndB:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64AND
|
|
|
|
|
return true
|
2020-03-16 02:51:54 +11:00
|
|
|
case OpAtomicAdd32:
|
|
|
|
|
v.Op = OpRISCV64LoweredAtomicAdd32
|
|
|
|
|
return true
|
|
|
|
|
case OpAtomicAdd64:
|
|
|
|
|
v.Op = OpRISCV64LoweredAtomicAdd64
|
|
|
|
|
return true
|
|
|
|
|
case OpAtomicCompareAndSwap32:
|
|
|
|
|
v.Op = OpRISCV64LoweredAtomicCas32
|
|
|
|
|
return true
|
|
|
|
|
case OpAtomicCompareAndSwap64:
|
|
|
|
|
v.Op = OpRISCV64LoweredAtomicCas64
|
|
|
|
|
return true
|
|
|
|
|
case OpAtomicExchange32:
|
|
|
|
|
v.Op = OpRISCV64LoweredAtomicExchange32
|
|
|
|
|
return true
|
|
|
|
|
case OpAtomicExchange64:
|
|
|
|
|
v.Op = OpRISCV64LoweredAtomicExchange64
|
|
|
|
|
return true
|
2020-03-16 02:47:40 +11:00
|
|
|
case OpAtomicLoad32:
|
|
|
|
|
v.Op = OpRISCV64LoweredAtomicLoad32
|
|
|
|
|
return true
|
|
|
|
|
case OpAtomicLoad64:
|
|
|
|
|
v.Op = OpRISCV64LoweredAtomicLoad64
|
|
|
|
|
return true
|
2020-03-16 02:38:43 +11:00
|
|
|
case OpAtomicLoad8:
|
|
|
|
|
v.Op = OpRISCV64LoweredAtomicLoad8
|
|
|
|
|
return true
|
2020-03-16 02:47:40 +11:00
|
|
|
case OpAtomicLoadPtr:
|
|
|
|
|
v.Op = OpRISCV64LoweredAtomicLoad64
|
|
|
|
|
return true
|
|
|
|
|
case OpAtomicStore32:
|
|
|
|
|
v.Op = OpRISCV64LoweredAtomicStore32
|
|
|
|
|
return true
|
|
|
|
|
case OpAtomicStore64:
|
|
|
|
|
v.Op = OpRISCV64LoweredAtomicStore64
|
|
|
|
|
return true
|
2020-03-16 02:38:43 +11:00
|
|
|
case OpAtomicStore8:
|
|
|
|
|
v.Op = OpRISCV64LoweredAtomicStore8
|
|
|
|
|
return true
|
2020-03-16 02:47:40 +11:00
|
|
|
case OpAtomicStorePtrNoWB:
|
|
|
|
|
v.Op = OpRISCV64LoweredAtomicStore64
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpAvg64u:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpAvg64u(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpClosureCall:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64CALLclosure
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCom16:
|
2020-03-03 03:43:02 +11:00
|
|
|
v.Op = OpRISCV64NOT
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCom32:
|
2020-03-03 03:43:02 +11:00
|
|
|
v.Op = OpRISCV64NOT
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCom64:
|
2020-03-03 03:43:02 +11:00
|
|
|
v.Op = OpRISCV64NOT
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCom8:
|
2020-03-03 03:43:02 +11:00
|
|
|
v.Op = OpRISCV64NOT
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConst16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MOVHconst
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConst32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MOVWconst
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConst32F:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpConst32F(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConst64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MOVDconst
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConst64F:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpConst64F(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConst8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MOVBconst
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConstBool:
|
2020-04-20 18:15:50 -04:00
|
|
|
return rewriteValueRISCV64_OpConstBool(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConstNil:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpConstNil(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpConvert:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MOVconvert
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt32Fto32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTWS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt32Fto64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTLS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt32Fto64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTDS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt32to32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTSW
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt32to64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTDW
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt64Fto32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTWD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt64Fto32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTSD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt64Fto64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTLD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt64to32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTSL
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpCvt64to64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FCVTDL
|
|
|
|
|
return true
|
2020-02-28 17:04:16 -08:00
|
|
|
case OpCvtBoolToUint8:
|
|
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpDiv16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv16u:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpDiv16u(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv32:
|
2020-04-20 18:15:50 -04:00
|
|
|
return rewriteValueRISCV64_OpDiv32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FDIVS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv32u:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64DIVUW
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv64:
|
2020-04-20 18:15:50 -04:00
|
|
|
return rewriteValueRISCV64_OpDiv64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FDIVD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv64u:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64DIVU
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpDiv8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpDiv8u:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpDiv8u(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEq16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpEq16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEq32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpEq32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEq32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FEQS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEq64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpEq64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEq64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FEQD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEq8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpEq8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEqB:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpEqB(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpEqPtr:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpEqPtr(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpGetCallerPC:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64LoweredGetCallerPC
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpGetCallerSP:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64LoweredGetCallerSP
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpGetClosurePtr:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64LoweredGetClosurePtr
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpHmul32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpHmul32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpHmul32u:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpHmul32u(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpHmul64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MULH
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpHmul64u:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MULHU
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpInterCall:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64CALLinter
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpIsInBounds:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpLess64U
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpIsNonNil:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpIsNonNil(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpIsSliceInBounds:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpLeq64U
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq16U:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq16U(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FLES
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq32U:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq32U(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FLED
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq64U:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq64U(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLeq8U:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLeq8U(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLess16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess16U:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLess16U(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLess32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FLTS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess32U:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLess32U(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64SLT
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FLTD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess64U:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64SLTU
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLess8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLess8U:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLess8U(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLoad:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLoad(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLocalAddr:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLocalAddr(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh16x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh16x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh16x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh16x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh16x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh16x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh16x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh16x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh32x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh32x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh32x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh32x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh32x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh32x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh32x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh32x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh64x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh64x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh64x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh64x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh64x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh64x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh64x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh64x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh8x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh8x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh8x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh8x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh8x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh8x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpLsh8x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpLsh8x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpMod16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod16u:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpMod16u(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod32:
|
2020-04-20 18:15:50 -04:00
|
|
|
return rewriteValueRISCV64_OpMod32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod32u:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64REMUW
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod64:
|
2020-04-20 18:15:50 -04:00
|
|
|
return rewriteValueRISCV64_OpMod64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod64u:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64REMU
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpMod8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMod8u:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpMod8u(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMove:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpMove(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMul16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpMul16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMul32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MULW
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMul32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FMULS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMul64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64MUL
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMul64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FMULD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpMul8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpMul8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeg16:
|
2020-03-03 03:45:22 +11:00
|
|
|
v.Op = OpRISCV64NEG
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeg32:
|
2020-03-03 03:45:22 +11:00
|
|
|
v.Op = OpRISCV64NEG
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeg32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FNEGS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeg64:
|
2020-03-03 03:45:22 +11:00
|
|
|
v.Op = OpRISCV64NEG
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeg64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FNEGD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeg8:
|
2020-03-03 03:45:22 +11:00
|
|
|
v.Op = OpRISCV64NEG
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeq16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpNeq16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeq32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpNeq32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeq32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FNES
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeq64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpNeq64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeq64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FNED
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeq8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpNeq8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeqB:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64XOR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNeqPtr:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpNeqPtr(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNilCheck:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64LoweredNilCheck
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpNot:
|
2020-04-26 04:34:34 +10:00
|
|
|
v.Op = OpRISCV64SEQZ
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpOffPtr:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpOffPtr(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpOr16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64OR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpOr32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64OR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpOr64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64OR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpOr8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64OR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpOrB:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64OR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpPanicBounds:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpPanicBounds(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64ADD:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64ADD(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64ADDI:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64ADDI(v)
|
2020-03-10 03:31:22 +11:00
|
|
|
case OpRISCV64AND:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64AND(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVBUload:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVBUload(v)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
case OpRISCV64MOVBUreg:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVBUreg(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVBload:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVBload(v)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
case OpRISCV64MOVBreg:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVBreg(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVBstore:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVBstore(v)
|
2020-03-02 04:26:54 +11:00
|
|
|
case OpRISCV64MOVBstorezero:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVBstorezero(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVDconst:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVDconst(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVDload:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVDload(v)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
case OpRISCV64MOVDreg:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVDreg(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVDstore:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVDstore(v)
|
2020-03-02 04:26:54 +11:00
|
|
|
case OpRISCV64MOVDstorezero:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVDstorezero(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVHUload:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVHUload(v)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
case OpRISCV64MOVHUreg:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVHUreg(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVHload:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVHload(v)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
case OpRISCV64MOVHreg:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVHreg(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVHstore:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVHstore(v)
|
2020-03-02 04:26:54 +11:00
|
|
|
case OpRISCV64MOVHstorezero:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVHstorezero(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVWUload:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVWUload(v)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
case OpRISCV64MOVWUreg:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVWUreg(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVWload:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVWload(v)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
case OpRISCV64MOVWreg:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVWreg(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRISCV64MOVWstore:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVWstore(v)
|
2020-03-02 04:26:54 +11:00
|
|
|
case OpRISCV64MOVWstorezero:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64MOVWstorezero(v)
|
2020-03-10 03:31:22 +11:00
|
|
|
case OpRISCV64OR:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64OR(v)
|
|
|
|
|
case OpRISCV64SLL:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64SLL(v)
|
|
|
|
|
case OpRISCV64SRA:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64SRA(v)
|
|
|
|
|
case OpRISCV64SRL:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64SRL(v)
|
2020-03-02 04:23:12 +11:00
|
|
|
case OpRISCV64SUB:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64SUB(v)
|
2020-03-02 04:24:35 +11:00
|
|
|
case OpRISCV64SUBW:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64SUBW(v)
|
2020-03-10 03:31:22 +11:00
|
|
|
case OpRISCV64XOR:
|
|
|
|
|
return rewriteValueRISCV64_OpRISCV64XOR(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRotateLeft16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRotateLeft16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRotateLeft32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRotateLeft32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRotateLeft64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRotateLeft64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRotateLeft8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRotateLeft8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRound32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRound64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16Ux16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16Ux16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16Ux32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16Ux32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16Ux64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16Ux64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16Ux8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16Ux8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh16x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh16x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32Ux16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32Ux16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32Ux32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32Ux32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32Ux64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32Ux64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32Ux8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32Ux8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh32x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh32x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64Ux16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64Ux16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64Ux32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64Ux32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64Ux64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64Ux64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64Ux8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64Ux8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh64x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh64x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8Ux16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8Ux16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8Ux32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8Ux32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8Ux64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8Ux64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8Ux8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8Ux8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8x16:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8x16(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8x32:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8x32(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8x64:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8x64(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpRsh8x8:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpRsh8x8(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSignExt16to32:
|
2020-10-25 00:32:23 +11:00
|
|
|
v.Op = OpRISCV64MOVHreg
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSignExt16to64:
|
2020-10-25 00:32:23 +11:00
|
|
|
v.Op = OpRISCV64MOVHreg
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSignExt32to64:
|
2020-10-25 00:32:23 +11:00
|
|
|
v.Op = OpRISCV64MOVWreg
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSignExt8to16:
|
2020-10-25 00:32:23 +11:00
|
|
|
v.Op = OpRISCV64MOVBreg
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSignExt8to32:
|
2020-10-25 00:32:23 +11:00
|
|
|
v.Op = OpRISCV64MOVBreg
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSignExt8to64:
|
2020-10-25 00:32:23 +11:00
|
|
|
v.Op = OpRISCV64MOVBreg
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSlicemask:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpSlicemask(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSqrt:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FSQRTD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpStaticCall:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64CALLstatic
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpStore:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpStore(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSub16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64SUB
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSub32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64SUB
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSub32F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FSUBS
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSub64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64SUB
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSub64F:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64FSUBD
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSub8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64SUB
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpSubPtr:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64SUB
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpTrunc16to8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpTrunc32to16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpTrunc32to8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpTrunc64to16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpTrunc64to32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpTrunc64to8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpCopy
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpWB:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64LoweredWB
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpXor16:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64XOR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpXor32:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64XOR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpXor64:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64XOR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpXor8:
|
2020-01-23 14:28:04 -08:00
|
|
|
v.Op = OpRISCV64XOR
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpZero:
|
2020-01-21 20:53:30 -08:00
|
|
|
return rewriteValueRISCV64_OpZero(v)
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpZeroExt16to32:
|
2020-10-25 00:32:23 +11:00
|
|
|
v.Op = OpRISCV64MOVHUreg
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpZeroExt16to64:
|
2020-10-25 00:32:23 +11:00
|
|
|
v.Op = OpRISCV64MOVHUreg
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpZeroExt32to64:
|
2020-10-25 00:32:23 +11:00
|
|
|
v.Op = OpRISCV64MOVWUreg
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpZeroExt8to16:
|
2020-10-25 00:32:23 +11:00
|
|
|
v.Op = OpRISCV64MOVBUreg
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpZeroExt8to32:
|
2020-10-25 00:32:23 +11:00
|
|
|
v.Op = OpRISCV64MOVBUreg
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
case OpZeroExt8to64:
|
2020-10-25 00:32:23 +11:00
|
|
|
v.Op = OpRISCV64MOVBUreg
|
|
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-04-12 20:05:14 -07:00
|
|
|
func rewriteValueRISCV64_OpAddr(v *Value) bool {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (Addr {sym} base)
|
|
|
|
|
// result: (MOVaddr {sym} [0] base)
|
|
|
|
|
for {
|
|
|
|
|
sym := auxToSym(v.Aux)
|
|
|
|
|
base := v_0
|
|
|
|
|
v.reset(OpRISCV64MOVaddr)
|
|
|
|
|
v.AuxInt = int32ToAuxInt(0)
|
|
|
|
|
v.Aux = symToAux(sym)
|
|
|
|
|
v.AddArg(base)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpAvg64u(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Avg64u <t> x y)
|
|
|
|
|
// result: (ADD (ADD <t> (SRLI <t> [1] x) (SRLI <t> [1] y)) (ANDI <t> [1] (AND <t> x y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64ADD)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64ADD, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64SRLI, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v1.AuxInt = int64ToAuxInt(1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SRLI, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64ANDI, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpRISCV64AND, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v4.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3.AddArg(v4)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v3)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpConst32F(v *Value) bool {
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Const32F [val])
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (FMVSX (MOVWconst [int32(math.Float32bits(val))]))
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToFloat32(v.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64FMVSX)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32)
|
2020-04-20 18:15:50 -04:00
|
|
|
v0.AuxInt = int32ToAuxInt(int32(math.Float32bits(val)))
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpConst64F(v *Value) bool {
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Const64F [val])
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (FMVDX (MOVDconst [int64(math.Float64bits(val))]))
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToFloat64(v.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64FMVDX)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
2020-04-20 18:15:50 -04:00
|
|
|
v0.AuxInt = int64ToAuxInt(int64(math.Float64bits(val)))
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
func rewriteValueRISCV64_OpConstBool(v *Value) bool {
|
|
|
|
|
// match: (ConstBool [val])
|
|
|
|
|
// result: (MOVBconst [int8(b2i(val))])
|
|
|
|
|
for {
|
|
|
|
|
val := auxIntToBool(v.AuxInt)
|
|
|
|
|
v.reset(OpRISCV64MOVBconst)
|
|
|
|
|
v.AuxInt = int8ToAuxInt(int8(b2i(val)))
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpConstNil(v *Value) bool {
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (ConstNil)
|
|
|
|
|
// result: (MOVDconst [0])
|
|
|
|
|
for {
|
|
|
|
|
v.reset(OpRISCV64MOVDconst)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(0)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpDiv16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
2020-04-20 18:15:50 -04:00
|
|
|
// match: (Div16 x y [false])
|
2019-11-04 04:40:47 +11:00
|
|
|
// result: (DIVW (SignExt16to32 x) (SignExt16to32 y))
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if auxIntToBool(v.AuxInt) != false {
|
|
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64DIVW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
return false
|
2019-11-04 04:40:47 +11:00
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpDiv16u(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Div16u x y)
|
|
|
|
|
// result: (DIVUW (ZeroExt16to32 x) (ZeroExt16to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64DIVUW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
func rewriteValueRISCV64_OpDiv32(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (Div32 x y [false])
|
|
|
|
|
// result: (DIVW x y)
|
|
|
|
|
for {
|
|
|
|
|
if auxIntToBool(v.AuxInt) != false {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
|
|
|
|
v.reset(OpRISCV64DIVW)
|
|
|
|
|
v.AddArg2(x, y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpDiv64(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (Div64 x y [false])
|
|
|
|
|
// result: (DIV x y)
|
|
|
|
|
for {
|
|
|
|
|
if auxIntToBool(v.AuxInt) != false {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
|
|
|
|
v.reset(OpRISCV64DIV)
|
|
|
|
|
v.AddArg2(x, y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpDiv8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Div8 x y)
|
|
|
|
|
// result: (DIVW (SignExt8to32 x) (SignExt8to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64DIVW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpDiv8u(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Div8u x y)
|
|
|
|
|
// result: (DIVUW (ZeroExt8to32 x) (ZeroExt8to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64DIVUW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpEq16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Eq16 x y)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
// result: (SEQZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y)))
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SEQZ)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg2(v1, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpEq32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Eq32 x y)
|
2020-02-26 03:58:59 +11:00
|
|
|
// result: (SEQZ (SUBW <x.Type> x y))
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SEQZ)
|
2020-02-26 03:58:59 +11:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUBW, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpEq64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Eq64 x y)
|
|
|
|
|
// result: (SEQZ (SUB <x.Type> x y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SEQZ)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpEq8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Eq8 x y)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
// result: (SEQZ (SUB <x.Type> (ZeroExt8to64 x) (ZeroExt8to64 y)))
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SEQZ)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg2(v1, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpEqB(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (EqB x y)
|
2020-04-26 04:34:34 +10:00
|
|
|
// result: (SEQZ (XOR <typ.Bool> x y))
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2020-04-26 04:34:34 +10:00
|
|
|
v.reset(OpRISCV64SEQZ)
|
2019-11-04 04:40:47 +11:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64XOR, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpEqPtr(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (EqPtr x y)
|
|
|
|
|
// result: (SEQZ (SUB <x.Type> x y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SEQZ)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpHmul32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Hmul32 x y)
|
|
|
|
|
// result: (SRAI [32] (MUL (SignExt32to64 x) (SignExt32to64 y)))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRAI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(32)
|
2019-11-04 04:40:47 +11:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MUL, typ.Int64)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v2.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpHmul32u(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Hmul32u x y)
|
|
|
|
|
// result: (SRLI [32] (MUL (ZeroExt32to64 x) (ZeroExt32to64 y)))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2020-01-23 14:28:04 -08:00
|
|
|
v.reset(OpRISCV64SRLI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(32)
|
2020-01-23 14:28:04 -08:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MUL, typ.Int64)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v2.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, v2)
|
2020-01-23 14:28:04 -08:00
|
|
|
v.AddArg(v0)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpIsNonNil(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (IsNonNil p)
|
|
|
|
|
// result: (NeqPtr (MOVDconst) p)
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
p := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNeqPtr)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, p)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq16 x y)
|
|
|
|
|
// result: (Not (Less16 y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess16, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq16U(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq16U x y)
|
|
|
|
|
// result: (Not (Less16U y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess16U, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq32 x y)
|
|
|
|
|
// result: (Not (Less32 y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess32, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq32U(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq32U x y)
|
|
|
|
|
// result: (Not (Less32U y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess32U, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq64 x y)
|
|
|
|
|
// result: (Not (Less64 y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess64, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq64U(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq64U x y)
|
|
|
|
|
// result: (Not (Less64U y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess64U, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq8 x y)
|
|
|
|
|
// result: (Not (Less8 y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess8, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLeq8U(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Leq8U x y)
|
|
|
|
|
// result: (Not (Less8U y x))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpNot)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLess8U, typ.Bool)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, x)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLess16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Less16 x y)
|
|
|
|
|
// result: (SLT (SignExt16to64 x) (SignExt16to64 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SLT)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLess16U(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Less16U x y)
|
|
|
|
|
// result: (SLTU (ZeroExt16to64 x) (ZeroExt16to64 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SLTU)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLess32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Less32 x y)
|
|
|
|
|
// result: (SLT (SignExt32to64 x) (SignExt32to64 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SLT)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLess32U(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Less32U x y)
|
|
|
|
|
// result: (SLTU (ZeroExt32to64 x) (ZeroExt32to64 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SLTU)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLess8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Less8 x y)
|
|
|
|
|
// result: (SLT (SignExt8to64 x) (SignExt8to64 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SLT)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLess8U(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Less8U x y)
|
|
|
|
|
// result: (SLTU (ZeroExt8to64 x) (ZeroExt8to64 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SLTU)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLoad(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: t.IsBoolean()
|
|
|
|
|
// result: (MOVBUload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(t.IsBoolean()) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBUload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: ( is8BitInt(t) && isSigned(t))
|
|
|
|
|
// result: (MOVBload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is8BitInt(t) && isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: ( is8BitInt(t) && !isSigned(t))
|
|
|
|
|
// result: (MOVBUload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is8BitInt(t) && !isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBUload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is16BitInt(t) && isSigned(t))
|
|
|
|
|
// result: (MOVHload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is16BitInt(t) && isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is16BitInt(t) && !isSigned(t))
|
|
|
|
|
// result: (MOVHUload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is16BitInt(t) && !isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHUload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is32BitInt(t) && isSigned(t))
|
|
|
|
|
// result: (MOVWload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32BitInt(t) && isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is32BitInt(t) && !isSigned(t))
|
|
|
|
|
// result: (MOVWUload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32BitInt(t) && !isSigned(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWUload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: (is64BitInt(t) || isPtr(t))
|
|
|
|
|
// result: (MOVDload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is64BitInt(t) || isPtr(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: is32BitFloat(t)
|
|
|
|
|
// result: (FMOVWload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is32BitFloat(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64FMOVWload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Load <t> ptr mem)
|
|
|
|
|
// cond: is64BitFloat(t)
|
|
|
|
|
// result: (FMOVDload ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(is64BitFloat(t)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64FMOVDload)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(ptr, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLocalAddr(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (LocalAddr {sym} base _)
|
|
|
|
|
// result: (MOVaddr {sym} base)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
sym := auxToSym(v.Aux)
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
base := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVaddr)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.Aux = symToAux(sym)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(base)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh16x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh16x16 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh16x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh16x32 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh16x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Lsh16x64 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh16x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh16x8 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh32x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh32x16 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh32x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh32x32 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh32x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Lsh32x64 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh32x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh32x8 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh64x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh64x16 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh64x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh64x32 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh64x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Lsh64x64 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh64x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh64x8 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh8x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh8x16 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh8x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh8x32 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh8x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Lsh8x64 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpLsh8x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Lsh8x8 <t> x y)
|
|
|
|
|
// result: (AND (SLL <t> x y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpMod16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
2020-04-20 18:15:50 -04:00
|
|
|
// match: (Mod16 x y [false])
|
2019-11-04 04:40:47 +11:00
|
|
|
// result: (REMW (SignExt16to32 x) (SignExt16to32 y))
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if auxIntToBool(v.AuxInt) != false {
|
|
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64REMW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
return false
|
2019-11-04 04:40:47 +11:00
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpMod16u(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Mod16u x y)
|
|
|
|
|
// result: (REMUW (ZeroExt16to32 x) (ZeroExt16to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64REMUW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
func rewriteValueRISCV64_OpMod32(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (Mod32 x y [false])
|
|
|
|
|
// result: (REMW x y)
|
|
|
|
|
for {
|
|
|
|
|
if auxIntToBool(v.AuxInt) != false {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
|
|
|
|
v.reset(OpRISCV64REMW)
|
|
|
|
|
v.AddArg2(x, y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpMod64(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (Mod64 x y [false])
|
|
|
|
|
// result: (REM x y)
|
|
|
|
|
for {
|
|
|
|
|
if auxIntToBool(v.AuxInt) != false {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
|
|
|
|
v.reset(OpRISCV64REM)
|
|
|
|
|
v.AddArg2(x, y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpMod8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Mod8 x y)
|
|
|
|
|
// result: (REMW (SignExt8to32 x) (SignExt8to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64REMW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpMod8u(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Mod8u x y)
|
|
|
|
|
// result: (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64REMUW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpMove(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Move [0] _ _ mem)
|
|
|
|
|
// result: mem
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if auxIntToInt64(v.AuxInt) != 0 {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_2
|
2019-10-30 10:29:47 -07:00
|
|
|
v.copyOf(mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Move [1] dst src mem)
|
|
|
|
|
// result: (MOVBstore dst (MOVBload src mem) mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if auxIntToInt64(v.AuxInt) != 1 {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
dst := v_0
|
|
|
|
|
src := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVBstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVBload, typ.Int8)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(src, mem)
|
|
|
|
|
v.AddArg3(dst, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Move [2] dst src mem)
|
|
|
|
|
// result: (MOVHstore dst (MOVHload src mem) mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if auxIntToInt64(v.AuxInt) != 2 {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
dst := v_0
|
|
|
|
|
src := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVHstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVHload, typ.Int16)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(src, mem)
|
|
|
|
|
v.AddArg3(dst, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Move [4] dst src mem)
|
|
|
|
|
// result: (MOVWstore dst (MOVWload src mem) mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if auxIntToInt64(v.AuxInt) != 4 {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
dst := v_0
|
|
|
|
|
src := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVWstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVWload, typ.Int32)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(src, mem)
|
|
|
|
|
v.AddArg3(dst, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Move [8] dst src mem)
|
|
|
|
|
// result: (MOVDstore dst (MOVDload src mem) mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if auxIntToInt64(v.AuxInt) != 8 {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
dst := v_0
|
|
|
|
|
src := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVDstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDload, typ.Int64)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(src, mem)
|
|
|
|
|
v.AddArg3(dst, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Move [s] {t} dst src mem)
|
2020-06-14 00:06:24 +02:00
|
|
|
// cond: s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s)
|
|
|
|
|
// result: (DUFFCOPY [16 * (128 - s/8)] dst src mem)
|
|
|
|
|
for {
|
|
|
|
|
s := auxIntToInt64(v.AuxInt)
|
|
|
|
|
t := auxToType(v.Aux)
|
|
|
|
|
dst := v_0
|
|
|
|
|
src := v_1
|
|
|
|
|
mem := v_2
|
|
|
|
|
if !(s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64DUFFCOPY)
|
|
|
|
|
v.AuxInt = int64ToAuxInt(16 * (128 - s/8))
|
|
|
|
|
v.AddArg3(dst, src, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Move [s] {t} dst src mem)
|
2020-04-13 01:39:45 +10:00
|
|
|
// cond: (s <= 16 || logLargeCopy(v, s))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (LoweredMove [t.Alignment()] dst src (ADDI <src.Type> [s-moveSize(t.Alignment(), config)] src) mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
s := auxIntToInt64(v.AuxInt)
|
|
|
|
|
t := auxToType(v.Aux)
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
dst := v_0
|
|
|
|
|
src := v_1
|
|
|
|
|
mem := v_2
|
2020-04-13 01:39:45 +10:00
|
|
|
if !(s <= 16 || logLargeCopy(v, s)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64LoweredMove)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(t.Alignment())
|
2019-11-04 04:40:47 +11:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64ADDI, src.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v0.AuxInt = int64ToAuxInt(s - moveSize(t.Alignment(), config))
|
2019-11-04 04:40:47 +11:00
|
|
|
v0.AddArg(src)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg4(dst, src, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-04-13 01:39:45 +10:00
|
|
|
return false
|
2019-11-04 04:40:47 +11:00
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpMul16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Mul16 x y)
|
|
|
|
|
// result: (MULW (SignExt16to32 x) (SignExt16to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MULW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpMul8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Mul8 x y)
|
|
|
|
|
// result: (MULW (SignExt8to32 x) (SignExt8to32 y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MULW)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
|
|
|
|
|
v1.AddArg(y)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpNeq16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Neq16 x y)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
// result: (SNEZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y)))
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SNEZ)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg2(v1, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpNeq32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Neq32 x y)
|
2020-02-26 03:58:59 +11:00
|
|
|
// result: (SNEZ (SUBW <x.Type> x y))
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SNEZ)
|
2020-02-26 03:58:59 +11:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUBW, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpNeq64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Neq64 x y)
|
|
|
|
|
// result: (SNEZ (SUB <x.Type> x y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SNEZ)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpNeq8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Neq8 x y)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
// result: (SNEZ (SUB <x.Type> (ZeroExt8to64 x) (ZeroExt8to64 y)))
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SNEZ)
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v2.AddArg(y)
|
|
|
|
|
v0.AddArg2(v1, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpNeqPtr(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (NeqPtr x y)
|
|
|
|
|
// result: (SNEZ (SUB <x.Type> x y))
|
|
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SNEZ)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SUB, x.Type)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpOffPtr(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (OffPtr [off] ptr:(SP))
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(off)
|
|
|
|
|
// result: (MOVaddr [int32(off)] ptr)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off := auxIntToInt64(v.AuxInt)
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
2020-04-20 18:15:50 -04:00
|
|
|
if ptr.Op != OpSP || !(is32Bit(off)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVaddr)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(int32(off))
|
2020-01-23 14:28:04 -08:00
|
|
|
v.AddArg(ptr)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-01-23 14:28:04 -08:00
|
|
|
// match: (OffPtr [off] ptr)
|
|
|
|
|
// cond: is32Bit(off)
|
|
|
|
|
// result: (ADDI [off] ptr)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off := auxIntToInt64(v.AuxInt)
|
2020-01-23 14:28:04 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
if !(is32Bit(off)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADDI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(off)
|
2020-01-23 14:28:04 -08:00
|
|
|
v.AddArg(ptr)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-01-23 14:28:04 -08:00
|
|
|
// match: (OffPtr [off] ptr)
|
|
|
|
|
// result: (ADD (MOVDconst [off]) ptr)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off := auxIntToInt64(v.AuxInt)
|
2020-01-23 14:28:04 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
v.reset(OpRISCV64ADD)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
2020-04-20 18:15:50 -04:00
|
|
|
v0.AuxInt = int64ToAuxInt(off)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, ptr)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpPanicBounds(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (PanicBounds [kind] x y mem)
|
|
|
|
|
// cond: boundsABI(kind) == 0
|
|
|
|
|
// result: (LoweredPanicBoundsA [kind] x y mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
kind := auxIntToInt64(v.AuxInt)
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(boundsABI(kind) == 0) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64LoweredPanicBoundsA)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(kind)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(x, y, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (PanicBounds [kind] x y mem)
|
|
|
|
|
// cond: boundsABI(kind) == 1
|
|
|
|
|
// result: (LoweredPanicBoundsB [kind] x y mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
kind := auxIntToInt64(v.AuxInt)
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(boundsABI(kind) == 1) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64LoweredPanicBoundsB)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(kind)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(x, y, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (PanicBounds [kind] x y mem)
|
|
|
|
|
// cond: boundsABI(kind) == 2
|
|
|
|
|
// result: (LoweredPanicBoundsC [kind] x y mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
kind := auxIntToInt64(v.AuxInt)
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
|
|
|
|
mem := v_2
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(boundsABI(kind) == 2) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64LoweredPanicBoundsC)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(kind)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(x, y, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64ADD(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2020-03-10 03:31:22 +11:00
|
|
|
// match: (ADD (MOVBconst [val]) x)
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (ADDI [int64(val)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVBconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt8(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64ADDI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
// match: (ADD (MOVHconst [val]) x)
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (ADDI [int64(val)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVHconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt16(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64ADDI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
// match: (ADD (MOVWconst [val]) x)
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (ADDI [int64(val)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVWconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt32(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64ADDI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
// match: (ADD (MOVDconst [val]) x)
|
|
|
|
|
// cond: is32Bit(val)
|
|
|
|
|
// result: (ADDI [val] x)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
cmd/compile: use loops to handle commutative ops in rules
Prior to this change, we generated additional rules at rulegen time
for all possible combinations of args to commutative ops.
This is simple and works well, but leads to lots of generated rules.
This in turn has increased the size of the compiler,
made it hard to compile package ssa on small machines,
and provided a disincentive to mark some ops as commutative.
This change reworks how we handle commutative ops.
Instead of generating a rule per argument permutation,
we generate a series of nested loops, one for each commutative op.
Each loop tries both possible argument orderings.
I also considered attempting to canonicalize the inputs to the
rewrite rules. However, because either or both arguments might be
nothing more than an identifier, and because there can be arbitrary
conditions to evaluate during matching, I did not see how to proceed.
The duplicate rule detection now sorts arguments to commutative ops,
so that it can detect commutative-only duplicates.
There may be further optimizations to the new generated code.
In particular, we may not be removing as many bounds checks as before;
I have not investigated deeply. If more work here is needed,
we could do it with more hints or with improvements to the prove pass.
This change has almost no impact on the generated code.
It does not pass toolstash-check, however. In a handful of functions,
for reasons I do not understand, there are minor position changes.
For the entire series ending at this change,
there is negligible compiler performance impact.
The compiler binary shrinks by about 15%,
and package ssa shrinks by about 25%.
Package ssa also compiles ~25% faster with ~25% less memory.
Change-Id: Ia2ee9ceae7be08a17342319d4e31b0bb238a2ee4
Reviewed-on: https://go-review.googlesource.com/c/go/+/213703
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-06 22:24:02 -08:00
|
|
|
if v_0.Op != OpRISCV64MOVDconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt64(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
if !(is32Bit(val)) {
|
cmd/compile: use loops to handle commutative ops in rules
Prior to this change, we generated additional rules at rulegen time
for all possible combinations of args to commutative ops.
This is simple and works well, but leads to lots of generated rules.
This in turn has increased the size of the compiler,
made it hard to compile package ssa on small machines,
and provided a disincentive to mark some ops as commutative.
This change reworks how we handle commutative ops.
Instead of generating a rule per argument permutation,
we generate a series of nested loops, one for each commutative op.
Each loop tries both possible argument orderings.
I also considered attempting to canonicalize the inputs to the
rewrite rules. However, because either or both arguments might be
nothing more than an identifier, and because there can be arbitrary
conditions to evaluate during matching, I did not see how to proceed.
The duplicate rule detection now sorts arguments to commutative ops,
so that it can detect commutative-only duplicates.
There may be further optimizations to the new generated code.
In particular, we may not be removing as many bounds checks as before;
I have not investigated deeply. If more work here is needed,
we could do it with more hints or with improvements to the prove pass.
This change has almost no impact on the generated code.
It does not pass toolstash-check, however. In a handful of functions,
for reasons I do not understand, there are minor position changes.
For the entire series ending at this change,
there is negligible compiler performance impact.
The compiler binary shrinks by about 15%,
and package ssa shrinks by about 25%.
Package ssa also compiles ~25% faster with ~25% less memory.
Change-Id: Ia2ee9ceae7be08a17342319d4e31b0bb238a2ee4
Reviewed-on: https://go-review.googlesource.com/c/go/+/213703
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-06 22:24:02 -08:00
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADDI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(val)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
cmd/compile: use loops to handle commutative ops in rules
Prior to this change, we generated additional rules at rulegen time
for all possible combinations of args to commutative ops.
This is simple and works well, but leads to lots of generated rules.
This in turn has increased the size of the compiler,
made it hard to compile package ssa on small machines,
and provided a disincentive to mark some ops as commutative.
This change reworks how we handle commutative ops.
Instead of generating a rule per argument permutation,
we generate a series of nested loops, one for each commutative op.
Each loop tries both possible argument orderings.
I also considered attempting to canonicalize the inputs to the
rewrite rules. However, because either or both arguments might be
nothing more than an identifier, and because there can be arbitrary
conditions to evaluate during matching, I did not see how to proceed.
The duplicate rule detection now sorts arguments to commutative ops,
so that it can detect commutative-only duplicates.
There may be further optimizations to the new generated code.
In particular, we may not be removing as many bounds checks as before;
I have not investigated deeply. If more work here is needed,
we could do it with more hints or with improvements to the prove pass.
This change has almost no impact on the generated code.
It does not pass toolstash-check, however. In a handful of functions,
for reasons I do not understand, there are minor position changes.
For the entire series ending at this change,
there is negligible compiler performance impact.
The compiler binary shrinks by about 15%,
and package ssa shrinks by about 25%.
Package ssa also compiles ~25% faster with ~25% less memory.
Change-Id: Ia2ee9ceae7be08a17342319d4e31b0bb238a2ee4
Reviewed-on: https://go-review.googlesource.com/c/go/+/213703
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-06 22:24:02 -08:00
|
|
|
return true
|
2019-11-04 04:40:47 +11:00
|
|
|
}
|
cmd/compile: use loops to handle commutative ops in rules
Prior to this change, we generated additional rules at rulegen time
for all possible combinations of args to commutative ops.
This is simple and works well, but leads to lots of generated rules.
This in turn has increased the size of the compiler,
made it hard to compile package ssa on small machines,
and provided a disincentive to mark some ops as commutative.
This change reworks how we handle commutative ops.
Instead of generating a rule per argument permutation,
we generate a series of nested loops, one for each commutative op.
Each loop tries both possible argument orderings.
I also considered attempting to canonicalize the inputs to the
rewrite rules. However, because either or both arguments might be
nothing more than an identifier, and because there can be arbitrary
conditions to evaluate during matching, I did not see how to proceed.
The duplicate rule detection now sorts arguments to commutative ops,
so that it can detect commutative-only duplicates.
There may be further optimizations to the new generated code.
In particular, we may not be removing as many bounds checks as before;
I have not investigated deeply. If more work here is needed,
we could do it with more hints or with improvements to the prove pass.
This change has almost no impact on the generated code.
It does not pass toolstash-check, however. In a handful of functions,
for reasons I do not understand, there are minor position changes.
For the entire series ending at this change,
there is negligible compiler performance impact.
The compiler binary shrinks by about 15%,
and package ssa shrinks by about 25%.
Package ssa also compiles ~25% faster with ~25% less memory.
Change-Id: Ia2ee9ceae7be08a17342319d4e31b0bb238a2ee4
Reviewed-on: https://go-review.googlesource.com/c/go/+/213703
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-06 22:24:02 -08:00
|
|
|
break
|
2019-11-04 04:40:47 +11:00
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64ADDI(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (ADDI [c] (MOVaddr [d] {s} x))
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(c+int64(d))
|
|
|
|
|
// result: (MOVaddr [int32(c)+d] {s} x)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
c := auxIntToInt64(v.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
d := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
s := auxToSym(v_0.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
x := v_0.Args[0]
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(c + int64(d))) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVaddr)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(int32(c) + d)
|
|
|
|
|
v.Aux = symToAux(s)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (ADDI [0] x)
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if auxIntToInt64(v.AuxInt) != 0 {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-10-30 10:29:47 -07:00
|
|
|
v.copyOf(x)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-03-10 03:31:22 +11:00
|
|
|
func rewriteValueRISCV64_OpRISCV64AND(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (AND (MOVBconst [val]) x)
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (ANDI [int64(val)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVBconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt8(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64ANDI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
// match: (AND (MOVHconst [val]) x)
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (ANDI [int64(val)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVHconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt16(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64ANDI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
// match: (AND (MOVWconst [val]) x)
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (ANDI [int64(val)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVWconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt32(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64ANDI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
// match: (AND (MOVDconst [val]) x)
|
|
|
|
|
// cond: is32Bit(val)
|
|
|
|
|
// result: (ANDI [val] x)
|
|
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVDconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt64(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
if !(is32Bit(val)) {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ANDI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(val)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVBUload(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVBUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVBUload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBUload)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBUload [off1] {sym} (ADDI [off2] base) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVBUload [off1+int32(off2)] {sym} base mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBUload)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVBUreg(v *Value) bool {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
b := v.Block
|
|
|
|
|
// match: (MOVBUreg x:(MOVBUload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBUreg x:(MOVBUreg _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBUreg <t> x:(MOVBload [off] {sym} ptr mem))
|
|
|
|
|
// cond: x.Uses == 1 && clobber(x)
|
|
|
|
|
// result: @x.Block (MOVBUload <t> [off] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off := auxIntToInt32(x.AuxInt)
|
|
|
|
|
sym := auxToSym(x.Aux)
|
|
|
|
|
mem := x.Args[1]
|
|
|
|
|
ptr := x.Args[0]
|
|
|
|
|
if !(x.Uses == 1 && clobber(x)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = x.Block
|
|
|
|
|
v0 := b.NewValue0(x.Pos, OpRISCV64MOVBUload, t)
|
|
|
|
|
v.copyOf(v0)
|
|
|
|
|
v0.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v0.Aux = symToAux(sym)
|
|
|
|
|
v0.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVBload(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVBload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVBload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBload)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBload [off1] {sym} (ADDI [off2] base) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVBload [off1+int32(off2)] {sym} base mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBload)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVBreg(v *Value) bool {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
b := v.Block
|
|
|
|
|
// match: (MOVBreg x:(MOVBload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBreg x:(MOVBreg _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBreg <t> x:(MOVBUload [off] {sym} ptr mem))
|
|
|
|
|
// cond: x.Uses == 1 && clobber(x)
|
|
|
|
|
// result: @x.Block (MOVBload <t> [off] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off := auxIntToInt32(x.AuxInt)
|
|
|
|
|
sym := auxToSym(x.Aux)
|
|
|
|
|
mem := x.Args[1]
|
|
|
|
|
ptr := x.Args[0]
|
|
|
|
|
if !(x.Uses == 1 && clobber(x)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = x.Block
|
|
|
|
|
v0 := b.NewValue0(x.Pos, OpRISCV64MOVBload, t)
|
|
|
|
|
v.copyOf(v0)
|
|
|
|
|
v0.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v0.Aux = symToAux(sym)
|
|
|
|
|
v0.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVBstore(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVBstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVBstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBstore)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBstore [off1] {sym} (ADDI [off2] base) val mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVBstore [off1+int32(off2)] {sym} base val mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBstore)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:26:54 +11:00
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVBconst [0]) mem)
|
|
|
|
|
// result: (MOVBstorezero [off] {sym} ptr mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
ptr := v_0
|
2020-04-20 18:15:50 -04:00
|
|
|
if v_1.Op != OpRISCV64MOVBconst || auxIntToInt8(v_1.AuxInt) != 0 {
|
2020-03-02 04:26:54 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVBstorezero)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-03-02 04:26:54 +11:00
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVBreg x) mem)
|
|
|
|
|
// result: (MOVBstore [off] {sym} ptr x mem)
|
|
|
|
|
for {
|
|
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVBreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVBstore)
|
|
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
|
|
|
|
v.AddArg3(ptr, x, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVHreg x) mem)
|
|
|
|
|
// result: (MOVBstore [off] {sym} ptr x mem)
|
|
|
|
|
for {
|
|
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVHreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVBstore)
|
|
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
|
|
|
|
v.AddArg3(ptr, x, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVWreg x) mem)
|
|
|
|
|
// result: (MOVBstore [off] {sym} ptr x mem)
|
|
|
|
|
for {
|
|
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVWreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVBstore)
|
|
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
|
|
|
|
v.AddArg3(ptr, x, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVBUreg x) mem)
|
|
|
|
|
// result: (MOVBstore [off] {sym} ptr x mem)
|
|
|
|
|
for {
|
|
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVBUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVBstore)
|
|
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
|
|
|
|
v.AddArg3(ptr, x, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVHUreg x) mem)
|
|
|
|
|
// result: (MOVBstore [off] {sym} ptr x mem)
|
|
|
|
|
for {
|
|
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVHUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVBstore)
|
|
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
|
|
|
|
v.AddArg3(ptr, x, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBstore [off] {sym} ptr (MOVWUreg x) mem)
|
|
|
|
|
// result: (MOVBstore [off] {sym} ptr x mem)
|
|
|
|
|
for {
|
|
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVWUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVBstore)
|
|
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
|
|
|
|
v.AddArg3(ptr, x, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:26:54 +11:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVBstorezero(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (MOVBstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
|
|
|
|
|
// result: (MOVBstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
|
2020-03-02 04:26:54 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) {
|
2020-03-02 04:26:54 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBstorezero)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-03-02 04:26:54 +11:00
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVBstorezero [off1] {sym} (ADDI [off2] ptr) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVBstorezero [off1+int32(off2)] {sym} ptr mem)
|
2020-03-02 04:26:54 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2020-03-02 04:26:54 +11:00
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2020-03-02 04:26:54 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBstorezero)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-03-02 04:26:54 +11:00
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2019-11-04 04:40:47 +11:00
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVDconst(v *Value) bool {
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (MOVDconst <t> [c])
|
|
|
|
|
// cond: !is32Bit(c) && int32(c) < 0
|
|
|
|
|
// result: (ADD (SLLI <t> [32] (MOVDconst [c>>32+1])) (MOVDconst [int64(int32(c))]))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
2020-04-20 18:15:50 -04:00
|
|
|
c := auxIntToInt64(v.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(!is32Bit(c) && int32(c) < 0) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADD)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v0.AuxInt = int64ToAuxInt(32)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
2020-04-20 18:15:50 -04:00
|
|
|
v1.AuxInt = int64ToAuxInt(c>>32 + 1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(int64(int32(c)))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVDconst <t> [c])
|
|
|
|
|
// cond: !is32Bit(c) && int32(c) >= 0
|
|
|
|
|
// result: (ADD (SLLI <t> [32] (MOVDconst [c>>32+0])) (MOVDconst [int64(int32(c))]))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
2020-04-20 18:15:50 -04:00
|
|
|
c := auxIntToInt64(v.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
if !(!is32Bit(c) && int32(c) >= 0) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADD)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SLLI, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v0.AuxInt = int64ToAuxInt(32)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
2020-04-20 18:15:50 -04:00
|
|
|
v1.AuxInt = int64ToAuxInt(c>>32 + 0)
|
2019-11-04 04:40:47 +11:00
|
|
|
v0.AddArg(v1)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(int64(int32(c)))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVDload(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVDload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVDload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDload)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVDload [off1] {sym} (ADDI [off2] base) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVDload [off1+int32(off2)] {sym} base mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDload)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVDreg(v *Value) bool {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (MOVDreg x)
|
|
|
|
|
// cond: x.Uses == 1
|
|
|
|
|
// result: (MOVDnop x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if !(x.Uses == 1) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDnop)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVDstore(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVDstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVDstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDstore)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVDstore [off1] {sym} (ADDI [off2] base) val mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVDstore [off1+int32(off2)] {sym} base val mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDstore)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:26:54 +11:00
|
|
|
// match: (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem)
|
|
|
|
|
// result: (MOVDstorezero [off] {sym} ptr mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
ptr := v_0
|
2020-04-20 18:15:50 -04:00
|
|
|
if v_1.Op != OpRISCV64MOVDconst || auxIntToInt64(v_1.AuxInt) != 0 {
|
2020-03-02 04:26:54 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVDstorezero)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-03-02 04:26:54 +11:00
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVDstorezero(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (MOVDstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
|
|
|
|
|
// result: (MOVDstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
|
2020-03-02 04:26:54 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) {
|
2020-03-02 04:26:54 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDstorezero)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-03-02 04:26:54 +11:00
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVDstorezero [off1] {sym} (ADDI [off2] ptr) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVDstorezero [off1+int32(off2)] {sym} ptr mem)
|
2020-03-02 04:26:54 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2020-03-02 04:26:54 +11:00
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2020-03-02 04:26:54 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDstorezero)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-03-02 04:26:54 +11:00
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2019-11-04 04:40:47 +11:00
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVHUload(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVHUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVHUload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHUload)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHUload [off1] {sym} (ADDI [off2] base) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVHUload [off1+int32(off2)] {sym} base mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHUload)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVHUreg(v *Value) bool {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
b := v.Block
|
|
|
|
|
// match: (MOVHUreg x:(MOVBUload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHUreg x:(MOVHUload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVHUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHUreg x:(MOVBUreg _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHUreg x:(MOVHUreg _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVHUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHUreg <t> x:(MOVHload [off] {sym} ptr mem))
|
|
|
|
|
// cond: x.Uses == 1 && clobber(x)
|
|
|
|
|
// result: @x.Block (MOVHUload <t> [off] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVHload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off := auxIntToInt32(x.AuxInt)
|
|
|
|
|
sym := auxToSym(x.Aux)
|
|
|
|
|
mem := x.Args[1]
|
|
|
|
|
ptr := x.Args[0]
|
|
|
|
|
if !(x.Uses == 1 && clobber(x)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = x.Block
|
|
|
|
|
v0 := b.NewValue0(x.Pos, OpRISCV64MOVHUload, t)
|
|
|
|
|
v.copyOf(v0)
|
|
|
|
|
v0.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v0.Aux = symToAux(sym)
|
|
|
|
|
v0.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVHload(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVHload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVHload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHload)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHload [off1] {sym} (ADDI [off2] base) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVHload [off1+int32(off2)] {sym} base mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHload)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVHreg(v *Value) bool {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
b := v.Block
|
|
|
|
|
// match: (MOVHreg x:(MOVBload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHreg x:(MOVBUload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHreg x:(MOVHload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVHload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHreg x:(MOVBreg _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHreg x:(MOVBUreg _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHreg x:(MOVHreg _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVHreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHreg <t> x:(MOVHUload [off] {sym} ptr mem))
|
|
|
|
|
// cond: x.Uses == 1 && clobber(x)
|
|
|
|
|
// result: @x.Block (MOVHload <t> [off] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVHUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off := auxIntToInt32(x.AuxInt)
|
|
|
|
|
sym := auxToSym(x.Aux)
|
|
|
|
|
mem := x.Args[1]
|
|
|
|
|
ptr := x.Args[0]
|
|
|
|
|
if !(x.Uses == 1 && clobber(x)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = x.Block
|
|
|
|
|
v0 := b.NewValue0(x.Pos, OpRISCV64MOVHload, t)
|
|
|
|
|
v.copyOf(v0)
|
|
|
|
|
v0.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v0.Aux = symToAux(sym)
|
|
|
|
|
v0.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVHstore(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVHstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVHstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHstore)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHstore [off1] {sym} (ADDI [off2] base) val mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVHstore [off1+int32(off2)] {sym} base val mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHstore)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:26:54 +11:00
|
|
|
// match: (MOVHstore [off] {sym} ptr (MOVHconst [0]) mem)
|
|
|
|
|
// result: (MOVHstorezero [off] {sym} ptr mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
ptr := v_0
|
2020-04-20 18:15:50 -04:00
|
|
|
if v_1.Op != OpRISCV64MOVHconst || auxIntToInt16(v_1.AuxInt) != 0 {
|
2020-03-02 04:26:54 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVHstorezero)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-03-02 04:26:54 +11:00
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
// match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
|
|
|
|
|
// result: (MOVHstore [off] {sym} ptr x mem)
|
|
|
|
|
for {
|
|
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVHreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVHstore)
|
|
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
|
|
|
|
v.AddArg3(ptr, x, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHstore [off] {sym} ptr (MOVWreg x) mem)
|
|
|
|
|
// result: (MOVHstore [off] {sym} ptr x mem)
|
|
|
|
|
for {
|
|
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVWreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVHstore)
|
|
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
|
|
|
|
v.AddArg3(ptr, x, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHstore [off] {sym} ptr (MOVHUreg x) mem)
|
|
|
|
|
// result: (MOVHstore [off] {sym} ptr x mem)
|
|
|
|
|
for {
|
|
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVHUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVHstore)
|
|
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
|
|
|
|
v.AddArg3(ptr, x, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHstore [off] {sym} ptr (MOVWUreg x) mem)
|
|
|
|
|
// result: (MOVHstore [off] {sym} ptr x mem)
|
|
|
|
|
for {
|
|
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVWUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVHstore)
|
|
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
|
|
|
|
v.AddArg3(ptr, x, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:26:54 +11:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVHstorezero(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (MOVHstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
|
|
|
|
|
// result: (MOVHstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
|
2020-03-02 04:26:54 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) {
|
2020-03-02 04:26:54 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHstorezero)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-03-02 04:26:54 +11:00
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVHstorezero [off1] {sym} (ADDI [off2] ptr) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVHstorezero [off1+int32(off2)] {sym} ptr mem)
|
2020-03-02 04:26:54 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2020-03-02 04:26:54 +11:00
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2020-03-02 04:26:54 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHstorezero)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-03-02 04:26:54 +11:00
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2019-11-04 04:40:47 +11:00
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVWUload(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVWUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVWUload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWUload)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWUload [off1] {sym} (ADDI [off2] base) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVWUload [off1+int32(off2)] {sym} base mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWUload)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVWUreg(v *Value) bool {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
b := v.Block
|
|
|
|
|
// match: (MOVWUreg x:(MOVBUload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWUreg x:(MOVHUload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVHUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWUreg x:(MOVWUload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVWUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWUreg x:(MOVBUreg _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWUreg x:(MOVHUreg _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVHUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWUreg x:(MOVWUreg _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVWUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWUreg <t> x:(MOVWload [off] {sym} ptr mem))
|
|
|
|
|
// cond: x.Uses == 1 && clobber(x)
|
|
|
|
|
// result: @x.Block (MOVWUload <t> [off] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVWload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off := auxIntToInt32(x.AuxInt)
|
|
|
|
|
sym := auxToSym(x.Aux)
|
|
|
|
|
mem := x.Args[1]
|
|
|
|
|
ptr := x.Args[0]
|
|
|
|
|
if !(x.Uses == 1 && clobber(x)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = x.Block
|
|
|
|
|
v0 := b.NewValue0(x.Pos, OpRISCV64MOVWUload, t)
|
|
|
|
|
v.copyOf(v0)
|
|
|
|
|
v0.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v0.Aux = symToAux(sym)
|
|
|
|
|
v0.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVWload(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVWload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVWload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWload)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWload [off1] {sym} (ADDI [off2] base) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVWload [off1+int32(off2)] {sym} base mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWload)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(base, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVWreg(v *Value) bool {
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
b := v.Block
|
|
|
|
|
// match: (MOVWreg x:(MOVBload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWreg x:(MOVBUload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWreg x:(MOVHload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVHload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWreg x:(MOVHUload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVHUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWreg x:(MOVWload _ _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVWload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWreg x:(MOVBreg _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWreg x:(MOVBUreg _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVBUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWreg x:(MOVHreg _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVHreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWreg x:(MOVWreg _))
|
|
|
|
|
// result: (MOVDreg x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVWreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDreg)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWreg <t> x:(MOVWUload [off] {sym} ptr mem))
|
|
|
|
|
// cond: x.Uses == 1 && clobber(x)
|
|
|
|
|
// result: @x.Block (MOVWload <t> [off] {sym} ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
|
|
|
|
x := v_0
|
|
|
|
|
if x.Op != OpRISCV64MOVWUload {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off := auxIntToInt32(x.AuxInt)
|
|
|
|
|
sym := auxToSym(x.Aux)
|
|
|
|
|
mem := x.Args[1]
|
|
|
|
|
ptr := x.Args[0]
|
|
|
|
|
if !(x.Uses == 1 && clobber(x)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b = x.Block
|
|
|
|
|
v0 := b.NewValue0(x.Pos, OpRISCV64MOVWload, t)
|
|
|
|
|
v.copyOf(v0)
|
|
|
|
|
v0.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v0.Aux = symToAux(sym)
|
|
|
|
|
v0.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVWstore(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (MOVWstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
|
|
|
|
|
// result: (MOVWstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWstore)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWstore [off1] {sym} (ADDI [off2] base) val mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVWstore [off1+int32(off2)] {sym} base val mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
base := v_0.Args[0]
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWstore)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(base, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:26:54 +11:00
|
|
|
// match: (MOVWstore [off] {sym} ptr (MOVWconst [0]) mem)
|
|
|
|
|
// result: (MOVWstorezero [off] {sym} ptr mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
ptr := v_0
|
2020-04-20 18:15:50 -04:00
|
|
|
if v_1.Op != OpRISCV64MOVWconst || auxIntToInt32(v_1.AuxInt) != 0 {
|
2020-03-02 04:26:54 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVWstorezero)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-03-02 04:26:54 +11:00
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64.
Also where possible, replace an extension following a load with a different typed
load. This removes almost another 8,000 instructions from the go binary.
Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before
subtraction, rather than zero extending after subtraction. While this appears to
double the number of zero extensions, it often lets us completely eliminate them
as the load can already be performed in a properly typed manner.
As an example, prior to this change runtime.memequal16 was:
0000000000013028 <runtime.memequal16>:
13028: 00813183 ld gp,8(sp)
1302c: 00019183 lh gp,0(gp)
13030: 01013283 ld t0,16(sp)
13034: 00029283 lh t0,0(t0)
13038: 405181b3 sub gp,gp,t0
1303c: 03019193 slli gp,gp,0x30
13040: 0301d193 srli gp,gp,0x30
13044: 0011b193 seqz gp,gp
13048: 00310c23 sb gp,24(sp)
1304c: 00008067 ret
Whereas it now becomes:
0000000000012fa8 <runtime.memequal16>:
12fa8: 00813183 ld gp,8(sp)
12fac: 0001d183 lhu gp,0(gp)
12fb0: 01013283 ld t0,16(sp)
12fb4: 0002d283 lhu t0,0(t0)
12fb8: 405181b3 sub gp,gp,t0
12fbc: 0011b193 seqz gp,gp
12fc0: 00310c23 sb gp,24(sp)
12fc4: 00008067 ret
Change-Id: I16321feb18381241cab121c0097a126104c56c2c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264659
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-10-25 01:34:17 +11:00
|
|
|
// match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
|
|
|
|
|
// result: (MOVWstore [off] {sym} ptr x mem)
|
|
|
|
|
for {
|
|
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVWreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVWstore)
|
|
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
|
|
|
|
v.AddArg3(ptr, x, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
|
|
|
|
|
// result: (MOVWstore [off] {sym} ptr x mem)
|
|
|
|
|
for {
|
|
|
|
|
off := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
|
|
|
|
ptr := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVWUreg {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1.Args[0]
|
|
|
|
|
mem := v_2
|
|
|
|
|
v.reset(OpRISCV64MOVWstore)
|
|
|
|
|
v.AuxInt = int32ToAuxInt(off)
|
|
|
|
|
v.Aux = symToAux(sym)
|
|
|
|
|
v.AddArg3(ptr, x, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:26:54 +11:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpRISCV64MOVWstorezero(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (MOVWstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
|
|
|
|
|
// result: (MOVWstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
|
2020-03-02 04:26:54 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym1 := auxToSym(v.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
if v_0.Op != OpRISCV64MOVaddr {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt32(v_0.AuxInt)
|
|
|
|
|
sym2 := auxToSym(v_0.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) {
|
2020-03-02 04:26:54 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWstorezero)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + off2)
|
|
|
|
|
v.Aux = symToAux(mergeSymTyped(sym1, sym2))
|
2020-03-02 04:26:54 +11:00
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (MOVWstorezero [off1] {sym} (ADDI [off2] ptr) mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(int64(off1)+off2)
|
|
|
|
|
// result: (MOVWstorezero [off1+int32(off2)] {sym} ptr mem)
|
2020-03-02 04:26:54 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
off1 := auxIntToInt32(v.AuxInt)
|
|
|
|
|
sym := auxToSym(v.Aux)
|
2020-03-02 04:26:54 +11:00
|
|
|
if v_0.Op != OpRISCV64ADDI {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
off2 := auxIntToInt64(v_0.AuxInt)
|
2020-03-02 04:26:54 +11:00
|
|
|
ptr := v_0.Args[0]
|
|
|
|
|
mem := v_1
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(is32Bit(int64(off1) + off2)) {
|
2020-03-02 04:26:54 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWstorezero)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
|
|
|
|
|
v.Aux = symToAux(sym)
|
2020-03-02 04:26:54 +11:00
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2019-11-04 04:40:47 +11:00
|
|
|
return false
|
|
|
|
|
}
|
2020-03-10 03:31:22 +11:00
|
|
|
func rewriteValueRISCV64_OpRISCV64OR(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (OR (MOVBconst [val]) x)
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (ORI [int64(val)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVBconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt8(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64ORI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
// match: (OR (MOVHconst [val]) x)
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (ORI [int64(val)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVHconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt16(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64ORI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
// match: (OR (MOVWconst [val]) x)
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (ORI [int64(val)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVWconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt32(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64ORI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
// match: (OR (MOVDconst [val]) x)
|
|
|
|
|
// cond: is32Bit(val)
|
|
|
|
|
// result: (ORI [val] x)
|
|
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVDconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt64(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
if !(is32Bit(val)) {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ORI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(val)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpRISCV64SLL(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (SLL x (MOVBconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (SLLI [int64(val&63)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVBconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt8(v_1.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.reset(OpRISCV64SLLI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val & 63))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SLL x (MOVHconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (SLLI [int64(val&63)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVHconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt16(v_1.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.reset(OpRISCV64SLLI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val & 63))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SLL x (MOVWconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (SLLI [int64(val&63)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVWconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt32(v_1.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.reset(OpRISCV64SLLI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val & 63))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SLL x (MOVDconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (SLLI [int64(val&63)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt64(v_1.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.reset(OpRISCV64SLLI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val & 63))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpRISCV64SRA(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (SRA x (MOVBconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (SRAI [int64(val&63)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVBconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt8(v_1.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.reset(OpRISCV64SRAI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val & 63))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SRA x (MOVHconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (SRAI [int64(val&63)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVHconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt16(v_1.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.reset(OpRISCV64SRAI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val & 63))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SRA x (MOVWconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (SRAI [int64(val&63)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVWconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt32(v_1.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.reset(OpRISCV64SRAI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val & 63))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SRA x (MOVDconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (SRAI [int64(val&63)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt64(v_1.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.reset(OpRISCV64SRAI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val & 63))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpRISCV64SRL(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (SRL x (MOVBconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (SRLI [int64(val&63)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVBconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt8(v_1.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.reset(OpRISCV64SRLI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val & 63))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SRL x (MOVHconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (SRLI [int64(val&63)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVHconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt16(v_1.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.reset(OpRISCV64SRLI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val & 63))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SRL x (MOVWconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (SRLI [int64(val&63)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVWconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt32(v_1.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.reset(OpRISCV64SRLI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val & 63))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SRL x (MOVDconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (SRLI [int64(val&63)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt64(v_1.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.reset(OpRISCV64SRLI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val & 63))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-03-02 04:23:12 +11:00
|
|
|
func rewriteValueRISCV64_OpRISCV64SUB(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (SUB x (MOVBconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (ADDI [-int64(val)] x)
|
2020-03-02 04:23:12 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVBconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt8(v_1.AuxInt)
|
2020-03-02 04:23:12 +11:00
|
|
|
v.reset(OpRISCV64ADDI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(-int64(val))
|
2020-03-02 04:23:12 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB x (MOVHconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (ADDI [-int64(val)] x)
|
2020-03-02 04:23:12 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVHconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt16(v_1.AuxInt)
|
2020-03-02 04:23:12 +11:00
|
|
|
v.reset(OpRISCV64ADDI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(-int64(val))
|
2020-03-02 04:23:12 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB x (MOVWconst [val]))
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: is32Bit(-int64(val))
|
|
|
|
|
// result: (ADDI [-int64(val)] x)
|
2020-03-02 04:23:12 +11:00
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVWconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt32(v_1.AuxInt)
|
|
|
|
|
if !(is32Bit(-int64(val))) {
|
2020-03-02 04:23:12 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADDI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(-int64(val))
|
2020-03-02 04:23:12 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB x (MOVDconst [val]))
|
|
|
|
|
// cond: is32Bit(-val)
|
|
|
|
|
// result: (ADDI [-val] x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
|
|
|
|
if v_1.Op != OpRISCV64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt64(v_1.AuxInt)
|
2020-03-02 04:23:12 +11:00
|
|
|
if !(is32Bit(-val)) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADDI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(-val)
|
2020-03-02 04:23:12 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:24:35 +11:00
|
|
|
// match: (SUB x (MOVBconst [0]))
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
2020-04-20 18:15:50 -04:00
|
|
|
if v_1.Op != OpRISCV64MOVBconst || auxIntToInt8(v_1.AuxInt) != 0 {
|
2020-03-02 04:24:35 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.copyOf(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB x (MOVHconst [0]))
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
2020-04-20 18:15:50 -04:00
|
|
|
if v_1.Op != OpRISCV64MOVHconst || auxIntToInt16(v_1.AuxInt) != 0 {
|
2020-03-02 04:24:35 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.copyOf(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB x (MOVWconst [0]))
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
2020-04-20 18:15:50 -04:00
|
|
|
if v_1.Op != OpRISCV64MOVWconst || auxIntToInt32(v_1.AuxInt) != 0 {
|
2020-03-02 04:24:35 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.copyOf(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB x (MOVDconst [0]))
|
|
|
|
|
// result: x
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
2020-04-20 18:15:50 -04:00
|
|
|
if v_1.Op != OpRISCV64MOVDconst || auxIntToInt64(v_1.AuxInt) != 0 {
|
2020-03-02 04:24:35 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.copyOf(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-03 03:45:22 +11:00
|
|
|
// match: (SUB (MOVBconst [0]) x)
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if v_0.Op != OpRISCV64MOVBconst || auxIntToInt8(v_0.AuxInt) != 0 {
|
2020-03-03 03:45:22 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB (MOVHconst [0]) x)
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if v_0.Op != OpRISCV64MOVHconst || auxIntToInt16(v_0.AuxInt) != 0 {
|
2020-03-03 03:45:22 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB (MOVWconst [0]) x)
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if v_0.Op != OpRISCV64MOVWconst || auxIntToInt32(v_0.AuxInt) != 0 {
|
2020-03-03 03:45:22 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (SUB (MOVDconst [0]) x)
|
|
|
|
|
// result: (NEG x)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if v_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_0.AuxInt) != 0 {
|
2020-03-03 03:45:22 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64NEG)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:24:35 +11:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
func rewriteValueRISCV64_OpRISCV64SUBW(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (SUBW x (MOVWconst [0]))
|
|
|
|
|
// result: (ADDIW [0] x)
|
|
|
|
|
for {
|
|
|
|
|
x := v_0
|
2020-04-20 18:15:50 -04:00
|
|
|
if v_1.Op != OpRISCV64MOVWconst || auxIntToInt32(v_1.AuxInt) != 0 {
|
2020-03-02 04:24:35 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64ADDIW)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(0)
|
2020-03-02 04:24:35 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-03 03:45:22 +11:00
|
|
|
// match: (SUBW (MOVDconst [0]) x)
|
|
|
|
|
// result: (NEGW x)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if v_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_0.AuxInt) != 0 {
|
2020-03-03 03:45:22 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64NEGW)
|
|
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-02 04:23:12 +11:00
|
|
|
return false
|
|
|
|
|
}
|
2020-03-10 03:31:22 +11:00
|
|
|
func rewriteValueRISCV64_OpRISCV64XOR(v *Value) bool {
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
|
|
|
|
// match: (XOR (MOVBconst [val]) x)
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (XORI [int64(val)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVBconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt8(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64XORI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
// match: (XOR (MOVHconst [val]) x)
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (XORI [int64(val)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVHconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt16(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64XORI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
// match: (XOR (MOVWconst [val]) x)
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (XORI [int64(val)] x)
|
2020-03-10 03:31:22 +11:00
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVWconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt32(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
v.reset(OpRISCV64XORI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(int64(val))
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
// match: (XOR (MOVDconst [val]) x)
|
|
|
|
|
// cond: is32Bit(val)
|
|
|
|
|
// result: (XORI [val] x)
|
|
|
|
|
for {
|
|
|
|
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
|
|
|
|
if v_0.Op != OpRISCV64MOVDconst {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
val := auxIntToInt64(v_0.AuxInt)
|
2020-03-10 03:31:22 +11:00
|
|
|
x := v_1
|
|
|
|
|
if !(is32Bit(val)) {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64XORI)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(val)
|
2020-03-10 03:31:22 +11:00
|
|
|
v.AddArg(x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRotateLeft16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (RotateLeft16 <t> x (MOVHconst [c]))
|
|
|
|
|
// result: (Or16 (Lsh16x64 <t> x (MOVHconst [c&15])) (Rsh16Ux64 <t> x (MOVHconst [-c&15])))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_1.Op != OpRISCV64MOVHconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
c := auxIntToInt16(v_1.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpOr16)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLsh16x64, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16)
|
2020-04-20 18:15:50 -04:00
|
|
|
v1.AuxInt = int16ToAuxInt(c & 15)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpRsh16Ux64, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int16ToAuxInt(-c & 15)
|
2020-02-26 11:29:34 -08:00
|
|
|
v2.AddArg2(x, v3)
|
|
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRotateLeft32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (RotateLeft32 <t> x (MOVWconst [c]))
|
|
|
|
|
// result: (Or32 (Lsh32x64 <t> x (MOVWconst [c&31])) (Rsh32Ux64 <t> x (MOVWconst [-c&31])))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_1.Op != OpRISCV64MOVWconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
c := auxIntToInt32(v_1.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpOr32)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLsh32x64, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32)
|
2020-04-20 18:15:50 -04:00
|
|
|
v1.AuxInt = int32ToAuxInt(c & 31)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpRsh32Ux64, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int32ToAuxInt(-c & 31)
|
2020-02-26 11:29:34 -08:00
|
|
|
v2.AddArg2(x, v3)
|
|
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRotateLeft64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (RotateLeft64 <t> x (MOVDconst [c]))
|
|
|
|
|
// result: (Or64 (Lsh64x64 <t> x (MOVDconst [c&63])) (Rsh64Ux64 <t> x (MOVDconst [-c&63])))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_1.Op != OpRISCV64MOVDconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
c := auxIntToInt64(v_1.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpOr64)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLsh64x64, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
2020-04-20 18:15:50 -04:00
|
|
|
v1.AuxInt = int64ToAuxInt(c & 63)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpRsh64Ux64, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(-c & 63)
|
2020-02-26 11:29:34 -08:00
|
|
|
v2.AddArg2(x, v3)
|
|
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRotateLeft8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (RotateLeft8 <t> x (MOVBconst [c]))
|
|
|
|
|
// result: (Or8 (Lsh8x64 <t> x (MOVBconst [c&7])) (Rsh8Ux64 <t> x (MOVBconst [-c&7])))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2019-11-04 04:40:47 +11:00
|
|
|
if v_1.Op != OpRISCV64MOVBconst {
|
|
|
|
|
break
|
|
|
|
|
}
|
2020-04-20 18:15:50 -04:00
|
|
|
c := auxIntToInt8(v_1.AuxInt)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpOr8)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpLsh8x64, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64MOVBconst, typ.UInt8)
|
2020-04-20 18:15:50 -04:00
|
|
|
v1.AuxInt = int8ToAuxInt(c & 7)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpRsh8Ux64, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64MOVBconst, typ.UInt8)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int8ToAuxInt(-c & 7)
|
2020-02-26 11:29:34 -08:00
|
|
|
v2.AddArg2(x, v3)
|
|
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16Ux16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16Ux16 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16Ux32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16Ux32 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16Ux64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16Ux64 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16Ux8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16Ux8 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt16to64 x) y) (Neg16 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg16, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16x16 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16x32 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16x64 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh16x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh16x8 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt16to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32Ux16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32Ux16 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32Ux32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32Ux32 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32Ux64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32Ux64 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32Ux8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32Ux8 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt32to64 x) y) (Neg32 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg32, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32x16 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32x32 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32x64 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh32x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh32x8 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt32to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64Ux16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh64Ux16 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64Ux32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh64Ux32 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64Ux64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Rsh64Ux64 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64Ux8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh64Ux8 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> x y) (Neg64 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(x, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpNeg64, t)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh64x16 <t> x y)
|
|
|
|
|
// result: (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v1.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, v1)
|
|
|
|
|
v.AddArg2(x, v0)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh64x32 <t> x y)
|
|
|
|
|
// result: (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v1.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, v1)
|
|
|
|
|
v.AddArg2(x, v0)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Rsh64x64 <t> x y)
|
|
|
|
|
// result: (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v1.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2.AddArg(y)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, v1)
|
|
|
|
|
v.AddArg2(x, v0)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh64x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh64x8 <t> x y)
|
|
|
|
|
// result: (SRA <t> x (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v1.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
|
|
|
|
v1.AddArg(v2)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(y, v1)
|
|
|
|
|
v.AddArg2(x, v0)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8Ux16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8Ux16 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt16to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8Ux32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8Ux32 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt32to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8Ux64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8Ux64 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] y)))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8Ux8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8Ux8 <t> x y)
|
|
|
|
|
// result: (AND (SRL <t> (ZeroExt8to64 x) y) (Neg8 <t> (SLTIU <t> [64] (ZeroExt8to64 y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64AND)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRL, t)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v1.AddArg(x)
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(v1, y)
|
2019-11-04 04:40:47 +11:00
|
|
|
v2 := b.NewValue0(v.Pos, OpNeg8, t)
|
|
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg2(v0, v2)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8x16(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8x16 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt16to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8x32(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8x32 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt32to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8x64(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8x64 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] y))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3.AddArg(y)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpRsh8x8(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Rsh8x8 <t> x y)
|
|
|
|
|
// result: (SRA <t> (SignExt8to64 x) (OR <y.Type> y (ADDI <y.Type> [-1] (SLTIU <y.Type> [64] (ZeroExt8to64 y)))))
|
|
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
|
|
|
|
y := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64SRA)
|
|
|
|
|
v.Type = t
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64)
|
|
|
|
|
v0.AddArg(x)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64OR, y.Type)
|
|
|
|
|
v2 := b.NewValue0(v.Pos, OpRISCV64ADDI, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v2.AuxInt = int64ToAuxInt(-1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v3 := b.NewValue0(v.Pos, OpRISCV64SLTIU, y.Type)
|
2020-04-20 18:15:50 -04:00
|
|
|
v3.AuxInt = int64ToAuxInt(64)
|
2019-11-04 04:40:47 +11:00
|
|
|
v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
|
|
|
|
v4.AddArg(y)
|
|
|
|
|
v3.AddArg(v4)
|
|
|
|
|
v2.AddArg(v3)
|
2020-02-26 11:29:34 -08:00
|
|
|
v1.AddArg2(y, v2)
|
|
|
|
|
v.AddArg2(v0, v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpSlicemask(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
// match: (Slicemask <t> x)
|
2020-03-16 03:29:19 +11:00
|
|
|
// result: (NOT (SRAI <t> [63] (ADDI <t> [-1] x)))
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
|
|
|
|
t := v.Type
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
x := v_0
|
2020-03-16 03:29:19 +11:00
|
|
|
v.reset(OpRISCV64NOT)
|
2020-02-26 04:01:29 +11:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64SRAI, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v0.AuxInt = int64ToAuxInt(63)
|
2020-02-26 04:01:29 +11:00
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64ADDI, t)
|
2020-04-20 18:15:50 -04:00
|
|
|
v1.AuxInt = int64ToAuxInt(-1)
|
2020-02-26 04:01:29 +11:00
|
|
|
v1.AddArg(x)
|
|
|
|
|
v0.AddArg(v1)
|
2019-11-04 04:40:47 +11:00
|
|
|
v.AddArg(v0)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpStore(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_2 := v.Args[2]
|
|
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
// match: (Store {t} ptr val mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: t.Size() == 1
|
2019-11-04 04:40:47 +11:00
|
|
|
// result: (MOVBstore ptr val mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
t := auxToType(v.Aux)
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(t.Size() == 1) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVBstore)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Store {t} ptr val mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: t.Size() == 2
|
2019-11-04 04:40:47 +11:00
|
|
|
// result: (MOVHstore ptr val mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
t := auxToType(v.Aux)
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(t.Size() == 2) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVHstore)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Store {t} ptr val mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: t.Size() == 4 && !is32BitFloat(val.Type)
|
2019-11-04 04:40:47 +11:00
|
|
|
// result: (MOVWstore ptr val mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
t := auxToType(v.Aux)
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(t.Size() == 4 && !is32BitFloat(val.Type)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVWstore)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Store {t} ptr val mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: t.Size() == 8 && !is64BitFloat(val.Type)
|
2019-11-04 04:40:47 +11:00
|
|
|
// result: (MOVDstore ptr val mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
t := auxToType(v.Aux)
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(t.Size() == 8 && !is64BitFloat(val.Type)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64MOVDstore)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Store {t} ptr val mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: t.Size() == 4 && is32BitFloat(val.Type)
|
2019-11-04 04:40:47 +11:00
|
|
|
// result: (FMOVWstore ptr val mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
t := auxToType(v.Aux)
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(t.Size() == 4 && is32BitFloat(val.Type)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64FMOVWstore)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Store {t} ptr val mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// cond: t.Size() == 8 && is64BitFloat(val.Type)
|
2019-11-04 04:40:47 +11:00
|
|
|
// result: (FMOVDstore ptr val mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
t := auxToType(v.Aux)
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
val := v_1
|
|
|
|
|
mem := v_2
|
2020-04-20 18:15:50 -04:00
|
|
|
if !(t.Size() == 8 && is64BitFloat(val.Type)) {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64FMOVDstore)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, val, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2020-01-21 20:53:30 -08:00
|
|
|
func rewriteValueRISCV64_OpZero(v *Value) bool {
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
v_1 := v.Args[1]
|
|
|
|
|
v_0 := v.Args[0]
|
2019-11-04 04:40:47 +11:00
|
|
|
b := v.Block
|
|
|
|
|
config := b.Func.Config
|
|
|
|
|
typ := &b.Func.Config.Types
|
|
|
|
|
// match: (Zero [0] _ mem)
|
|
|
|
|
// result: mem
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if auxIntToInt64(v.AuxInt) != 0 {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
mem := v_1
|
2019-10-30 10:29:47 -07:00
|
|
|
v.copyOf(mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Zero [1] ptr mem)
|
|
|
|
|
// result: (MOVBstore ptr (MOVBconst) mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if auxIntToInt64(v.AuxInt) != 1 {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVBstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVBconst, typ.UInt8)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Zero [2] ptr mem)
|
|
|
|
|
// result: (MOVHstore ptr (MOVHconst) mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if auxIntToInt64(v.AuxInt) != 2 {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVHstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVHconst, typ.UInt16)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Zero [4] ptr mem)
|
|
|
|
|
// result: (MOVWstore ptr (MOVWconst) mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if auxIntToInt64(v.AuxInt) != 4 {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVWstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVWconst, typ.UInt32)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Zero [8] ptr mem)
|
|
|
|
|
// result: (MOVDstore ptr (MOVDconst) mem)
|
|
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
if auxIntToInt64(v.AuxInt) != 8 {
|
2019-11-04 04:40:47 +11:00
|
|
|
break
|
|
|
|
|
}
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64MOVDstore)
|
|
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
2020-02-26 11:29:34 -08:00
|
|
|
v.AddArg3(ptr, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Zero [s] {t} ptr mem)
|
2020-06-14 00:06:24 +02:00
|
|
|
// cond: s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice
|
|
|
|
|
// result: (DUFFZERO [8 * (128 - s/8)] ptr mem)
|
|
|
|
|
for {
|
|
|
|
|
s := auxIntToInt64(v.AuxInt)
|
|
|
|
|
t := auxToType(v.Aux)
|
|
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
|
|
|
|
if !(s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
v.reset(OpRISCV64DUFFZERO)
|
|
|
|
|
v.AuxInt = int64ToAuxInt(8 * (128 - s/8))
|
|
|
|
|
v.AddArg2(ptr, mem)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (Zero [s] {t} ptr mem)
|
2020-04-20 18:15:50 -04:00
|
|
|
// result: (LoweredZero [t.Alignment()] ptr (ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.Alignment(), config)])) mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
2020-04-20 18:15:50 -04:00
|
|
|
s := auxIntToInt64(v.AuxInt)
|
|
|
|
|
t := auxToType(v.Aux)
|
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops
to use loops instead of duplicated code.
However, it loaded args using expressions like
v.Args[i] and v.Args[i^1], which the compiler could
not eliminate bounds for (including with all outstanding
prove CLs).
Also, given a series of separate rewrite rules for the same op,
we generated bounds checks for every rewrite rule, even though
we were repeatedly loading the same set of args.
This change reduces both sets of bounds checks.
Instead of loading v.Args[i] and v.Args[i^1] for commutative loops,
we now preload v.Args[0] and v.Args[1] into local variables,
and then swap them (as needed) in the commutative loop post statement.
And we now load all top level v.Args into local variables
at the beginning of every rewrite rule function.
The second optimization is the more significant,
but the first helps a little, and they play together
nicely from the perspective of generating the code.
This does increase register pressure, but the reduced bounds
checks more than compensate.
Note that the vast majority of rewrite rules evaluated
are not applied, so the prologue is the most important
part of the rewrite rules.
There is one subtle aspect to the new generated code.
Because the top level v.Args are shared across rewrite rules,
and rule evaluation can swap v_0 and v_1, v_0 and v_1
can end up being swapped from one rule to the next.
That is OK, because any time a rule does not get applied,
they will have been swapped exactly twice.
Passes toolstash-check -all.
name old time/op new time/op delta
Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96)
Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90)
GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94)
Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100)
SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99)
Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96)
GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93)
Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94)
Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95)
XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94)
[Geo mean] 424ms 421ms -0.68%
name old user-time/op new user-time/op delta
Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98)
Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90)
GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93)
Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100)
SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97)
Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92)
GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96)
Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92)
Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98)
XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95)
[Geo mean] 547ms 544ms -0.61%
file before after Δ %
compile 21113112 21109016 -4096 -0.019%
total 131704940 131700844 -4096 -0.003%
Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956
Reviewed-on: https://go-review.googlesource.com/c/go/+/216218
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-01-20 20:09:41 -08:00
|
|
|
ptr := v_0
|
|
|
|
|
mem := v_1
|
2019-11-04 04:40:47 +11:00
|
|
|
v.reset(OpRISCV64LoweredZero)
|
2020-04-20 18:15:50 -04:00
|
|
|
v.AuxInt = int64ToAuxInt(t.Alignment())
|
2019-11-04 04:40:47 +11:00
|
|
|
v0 := b.NewValue0(v.Pos, OpRISCV64ADD, ptr.Type)
|
|
|
|
|
v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
|
2020-04-20 18:15:50 -04:00
|
|
|
v1.AuxInt = int64ToAuxInt(s - moveSize(t.Alignment(), config))
|
2020-02-26 11:29:34 -08:00
|
|
|
v0.AddArg2(ptr, v1)
|
|
|
|
|
v.AddArg3(ptr, v0, mem)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
func rewriteBlockRISCV64(b *Block) bool {
|
|
|
|
|
switch b.Kind {
|
2020-03-31 02:04:45 +11:00
|
|
|
case BlockRISCV64BEQ:
|
|
|
|
|
// match: (BEQ (MOVDconst [0]) cond yes no)
|
|
|
|
|
// result: (BEQZ cond yes no)
|
|
|
|
|
for b.Controls[0].Op == OpRISCV64MOVDconst {
|
|
|
|
|
v_0 := b.Controls[0]
|
|
|
|
|
if auxIntToInt64(v_0.AuxInt) != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cond := b.Controls[1]
|
|
|
|
|
b.resetWithControl(BlockRISCV64BEQZ, cond)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (BEQ cond (MOVDconst [0]) yes no)
|
|
|
|
|
// result: (BEQZ cond yes no)
|
|
|
|
|
for b.Controls[1].Op == OpRISCV64MOVDconst {
|
|
|
|
|
cond := b.Controls[0]
|
|
|
|
|
v_1 := b.Controls[1]
|
|
|
|
|
if auxIntToInt64(v_1.AuxInt) != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.resetWithControl(BlockRISCV64BEQZ, cond)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
case BlockRISCV64BEQZ:
|
|
|
|
|
// match: (BEQZ (SEQZ x) yes no)
|
|
|
|
|
// result: (BNEZ x yes no)
|
|
|
|
|
for b.Controls[0].Op == OpRISCV64SEQZ {
|
|
|
|
|
v_0 := b.Controls[0]
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
b.resetWithControl(BlockRISCV64BNEZ, x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (BEQZ (SNEZ x) yes no)
|
|
|
|
|
// result: (BEQZ x yes no)
|
|
|
|
|
for b.Controls[0].Op == OpRISCV64SNEZ {
|
|
|
|
|
v_0 := b.Controls[0]
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
b.resetWithControl(BlockRISCV64BEQZ, x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (BEQZ (SUB x y) yes no)
|
|
|
|
|
// result: (BEQ x y yes no)
|
|
|
|
|
for b.Controls[0].Op == OpRISCV64SUB {
|
|
|
|
|
v_0 := b.Controls[0]
|
|
|
|
|
y := v_0.Args[1]
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
b.resetWithControl2(BlockRISCV64BEQ, x, y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (BEQZ (SLT x y) yes no)
|
|
|
|
|
// result: (BGE x y yes no)
|
|
|
|
|
for b.Controls[0].Op == OpRISCV64SLT {
|
|
|
|
|
v_0 := b.Controls[0]
|
|
|
|
|
y := v_0.Args[1]
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
b.resetWithControl2(BlockRISCV64BGE, x, y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (BEQZ (SLTU x y) yes no)
|
|
|
|
|
// result: (BGEU x y yes no)
|
|
|
|
|
for b.Controls[0].Op == OpRISCV64SLTU {
|
|
|
|
|
v_0 := b.Controls[0]
|
|
|
|
|
y := v_0.Args[1]
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
b.resetWithControl2(BlockRISCV64BGEU, x, y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
case BlockRISCV64BNE:
|
|
|
|
|
// match: (BNE (MOVDconst [0]) cond yes no)
|
|
|
|
|
// result: (BNEZ cond yes no)
|
|
|
|
|
for b.Controls[0].Op == OpRISCV64MOVDconst {
|
|
|
|
|
v_0 := b.Controls[0]
|
|
|
|
|
if auxIntToInt64(v_0.AuxInt) != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
cond := b.Controls[1]
|
|
|
|
|
b.resetWithControl(BlockRISCV64BNEZ, cond)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (BNE cond (MOVDconst [0]) yes no)
|
|
|
|
|
// result: (BNEZ cond yes no)
|
|
|
|
|
for b.Controls[1].Op == OpRISCV64MOVDconst {
|
|
|
|
|
cond := b.Controls[0]
|
|
|
|
|
v_1 := b.Controls[1]
|
|
|
|
|
if auxIntToInt64(v_1.AuxInt) != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
b.resetWithControl(BlockRISCV64BNEZ, cond)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-31 02:00:50 +11:00
|
|
|
case BlockRISCV64BNEZ:
|
2020-03-31 02:04:45 +11:00
|
|
|
// match: (BNEZ (SEQZ x) yes no)
|
|
|
|
|
// result: (BEQZ x yes no)
|
|
|
|
|
for b.Controls[0].Op == OpRISCV64SEQZ {
|
|
|
|
|
v_0 := b.Controls[0]
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
b.resetWithControl(BlockRISCV64BEQZ, x)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2020-03-31 02:00:50 +11:00
|
|
|
// match: (BNEZ (SNEZ x) yes no)
|
|
|
|
|
// result: (BNEZ x yes no)
|
2020-03-02 04:25:54 +11:00
|
|
|
for b.Controls[0].Op == OpRISCV64SNEZ {
|
|
|
|
|
v_0 := b.Controls[0]
|
|
|
|
|
x := v_0.Args[0]
|
2020-03-31 02:00:50 +11:00
|
|
|
b.resetWithControl(BlockRISCV64BNEZ, x)
|
2020-03-02 04:25:54 +11:00
|
|
|
return true
|
|
|
|
|
}
|
2020-03-31 02:04:45 +11:00
|
|
|
// match: (BNEZ (SUB x y) yes no)
|
|
|
|
|
// result: (BNE x y yes no)
|
|
|
|
|
for b.Controls[0].Op == OpRISCV64SUB {
|
|
|
|
|
v_0 := b.Controls[0]
|
|
|
|
|
y := v_0.Args[1]
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
b.resetWithControl2(BlockRISCV64BNE, x, y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (BNEZ (SLT x y) yes no)
|
|
|
|
|
// result: (BLT x y yes no)
|
|
|
|
|
for b.Controls[0].Op == OpRISCV64SLT {
|
|
|
|
|
v_0 := b.Controls[0]
|
|
|
|
|
y := v_0.Args[1]
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
b.resetWithControl2(BlockRISCV64BLT, x, y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// match: (BNEZ (SLTU x y) yes no)
|
|
|
|
|
// result: (BLTU x y yes no)
|
|
|
|
|
for b.Controls[0].Op == OpRISCV64SLTU {
|
|
|
|
|
v_0 := b.Controls[0]
|
|
|
|
|
y := v_0.Args[1]
|
|
|
|
|
x := v_0.Args[0]
|
|
|
|
|
b.resetWithControl2(BlockRISCV64BLTU, x, y)
|
|
|
|
|
return true
|
|
|
|
|
}
|
2019-11-04 04:40:47 +11:00
|
|
|
case BlockIf:
|
|
|
|
|
// match: (If cond yes no)
|
2020-03-31 02:00:50 +11:00
|
|
|
// result: (BNEZ cond yes no)
|
2019-11-04 04:40:47 +11:00
|
|
|
for {
|
|
|
|
|
cond := b.Controls[0]
|
2020-03-31 02:00:50 +11:00
|
|
|
b.resetWithControl(BlockRISCV64BNEZ, cond)
|
2019-11-04 04:40:47 +11:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|